xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// This multiclass generates the masking variants from the non-masking
16// variant.  It only provides the assembly pieces for the masking variants.
17// It assumes custom ISel patterns for masking which can be provided as
18// template arguments.
19multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                  dag Outs,
21                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                  string OpcodeStr,
23                                  string AttSrcAsm, string IntelSrcAsm,
24                                  list<dag> Pattern,
25                                  list<dag> MaskingPattern,
26                                  list<dag> ZeroMaskingPattern,
27                                  string MaskingConstraint = "",
28                                  bit IsCommutable = 0,
29                                  bit IsKCommutable = 0,
30                                  bit IsKZCommutable = IsCommutable,
31                                  string ClobberConstraint = ""> {
32  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33    def NAME: AVX512<O, F, Outs, Ins,
34                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35                                     "$dst, "#IntelSrcAsm#"}",
36                       Pattern>;
37
38  // Prefer over VMOV*rrk Pat<>
39  let isCommutable = IsKCommutable in
40    def NAME#k: AVX512<O, F, Outs, MaskingIns,
41                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
43                       MaskingPattern>,
44              EVEX_K {
45      // In case of the 3src subclass this is overridden with a let.
46      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
49    }
50
51  // Zero mask does not add any restrictions to commute operands transformation.
52  // So, it is Ok to use IsCommutable instead of IsKCommutable.
53  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54      Constraints = ClobberConstraint in
55    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
58                       ZeroMaskingPattern>,
59              EVEX_KZ;
60}
61
62
63// Common base class of AVX512_maskable and AVX512_maskable_3src.
64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
65                                  dag Outs,
66                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
67                                  string OpcodeStr,
68                                  string AttSrcAsm, string IntelSrcAsm,
69                                  dag RHS, dag MaskingRHS,
70                                  SDPatternOperator Select = vselect_mask,
71                                  string MaskingConstraint = "",
72                                  bit IsCommutable = 0,
73                                  bit IsKCommutable = 0,
74                                  bit IsKZCommutable = IsCommutable,
75                                  string ClobberConstraint = ""> :
76  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77                         AttSrcAsm, IntelSrcAsm,
78                         [(set _.RC:$dst, RHS)],
79                         [(set _.RC:$dst, MaskingRHS)],
80                         [(set _.RC:$dst,
81                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82                         MaskingConstraint, IsCommutable,
83                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
84
85// This multiclass generates the unconditional/non-masking, the masking and
86// the zero-masking variant of the vector instruction.  In the masking case, the
87// preserved vector elements come from a new dummy input operand tied to $dst.
88// This version uses a separate dag for non-masking and masking.
89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90                           dag Outs, dag Ins, string OpcodeStr,
91                           string AttSrcAsm, string IntelSrcAsm,
92                           dag RHS, dag MaskRHS,
93                           string ClobberConstraint = "",
94                           bit IsCommutable = 0, bit IsKCommutable = 0,
95                           bit IsKZCommutable = IsCommutable> :
96   AVX512_maskable_custom<O, F, Outs, Ins,
97                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98                          !con((ins _.KRCWM:$mask), Ins),
99                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
100                          [(set _.RC:$dst, RHS)],
101                          [(set _.RC:$dst,
102                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
103                          [(set _.RC:$dst,
104                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105                          "$src0 = $dst", IsCommutable, IsKCommutable,
106                          IsKZCommutable, ClobberConstraint>;
107
108// This multiclass generates the unconditional/non-masking, the masking and
109// the zero-masking variant of the vector instruction.  In the masking case, the
110// preserved vector elements come from a new dummy input operand tied to $dst.
111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112                           dag Outs, dag Ins, string OpcodeStr,
113                           string AttSrcAsm, string IntelSrcAsm,
114                           dag RHS,
115                           bit IsCommutable = 0, bit IsKCommutable = 0,
116                           bit IsKZCommutable = IsCommutable,
117                           SDPatternOperator Select = vselect_mask,
118                           string ClobberConstraint = ""> :
119   AVX512_maskable_common<O, F, _, Outs, Ins,
120                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121                          !con((ins _.KRCWM:$mask), Ins),
122                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125                          IsKZCommutable, ClobberConstraint>;
126
127// This multiclass generates the unconditional/non-masking, the masking and
128// the zero-masking variant of the scalar instruction.
129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130                           dag Outs, dag Ins, string OpcodeStr,
131                           string AttSrcAsm, string IntelSrcAsm,
132                           dag RHS> :
133   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134                   RHS, 0, 0, 0, X86selects_mask>;
135
136// Similar to AVX512_maskable but in this case one of the source operands
137// ($src1) is already tied to $dst so we just use that for the preserved
138// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
139// $src1.
140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141                                dag Outs, dag NonTiedIns, string OpcodeStr,
142                                string AttSrcAsm, string IntelSrcAsm,
143                                dag RHS,
144                                bit IsCommutable = 0,
145                                bit IsKCommutable = 0,
146                                SDPatternOperator Select = vselect_mask,
147                                bit MaskOnly = 0> :
148   AVX512_maskable_common<O, F, _, Outs,
149                          !con((ins _.RC:$src1), NonTiedIns),
150                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
153                          !if(MaskOnly, (null_frag), RHS),
154                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155                          Select, "", IsCommutable, IsKCommutable>;
156
157// Similar to AVX512_maskable_3src but in this case the input VT for the tied
158// operand differs from the output VT. This requires a bitconvert on
159// the preserved vector going into the vselect.
160// NOTE: The unmasked pattern is disabled.
161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162                                     X86VectorVTInfo InVT,
163                                     dag Outs, dag NonTiedIns, string OpcodeStr,
164                                     string AttSrcAsm, string IntelSrcAsm,
165                                     dag RHS, bit IsCommutable = 0> :
166   AVX512_maskable_common<O, F, OutVT, Outs,
167                          !con((ins InVT.RC:$src1), NonTiedIns),
168                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171                          (vselect_mask InVT.KRCWM:$mask, RHS,
172                           (bitconvert InVT.RC:$src1)),
173                           vselect_mask, "", IsCommutable>;
174
175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176                                     dag Outs, dag NonTiedIns, string OpcodeStr,
177                                     string AttSrcAsm, string IntelSrcAsm,
178                                     dag RHS,
179                                     bit IsCommutable = 0,
180                                     bit IsKCommutable = 0,
181                                     bit MaskOnly = 0> :
182   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184                        X86selects_mask, MaskOnly>;
185
186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
187                                  dag Outs, dag Ins,
188                                  string OpcodeStr,
189                                  string AttSrcAsm, string IntelSrcAsm,
190                                  list<dag> Pattern> :
191   AVX512_maskable_custom<O, F, Outs, Ins,
192                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193                          !con((ins _.KRCWM:$mask), Ins),
194                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
195                          "$src0 = $dst">;
196
197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198                                       dag Outs, dag NonTiedIns,
199                                       string OpcodeStr,
200                                       string AttSrcAsm, string IntelSrcAsm,
201                                       list<dag> Pattern> :
202   AVX512_maskable_custom<O, F, Outs,
203                          !con((ins _.RC:$src1), NonTiedIns),
204                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
207                          "">;
208
209// Instruction with mask that puts result in mask register,
210// like "compare" and "vptest"
211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
212                                  dag Outs,
213                                  dag Ins, dag MaskingIns,
214                                  string OpcodeStr,
215                                  string AttSrcAsm, string IntelSrcAsm,
216                                  list<dag> Pattern,
217                                  list<dag> MaskingPattern,
218                                  bit IsCommutable = 0> {
219    let isCommutable = IsCommutable in {
220    def NAME: AVX512<O, F, Outs, Ins,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                     "$dst, "#IntelSrcAsm#"}",
223                       Pattern>;
224
225    def NAME#k: AVX512<O, F, Outs, MaskingIns,
226                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
228                       MaskingPattern>, EVEX_K;
229    }
230}
231
232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
233                                  dag Outs,
234                                  dag Ins, dag MaskingIns,
235                                  string OpcodeStr,
236                                  string AttSrcAsm, string IntelSrcAsm,
237                                  dag RHS, dag MaskingRHS,
238                                  bit IsCommutable = 0> :
239  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240                         AttSrcAsm, IntelSrcAsm,
241                         [(set _.KRC:$dst, RHS)],
242                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
243
244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245                           dag Outs, dag Ins, string OpcodeStr,
246                           string AttSrcAsm, string IntelSrcAsm,
247                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
248   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249                          !con((ins _.KRCWM:$mask), Ins),
250                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
252
253// Used by conversion instructions.
254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
255                                  dag Outs,
256                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
257                                  string OpcodeStr,
258                                  string AttSrcAsm, string IntelSrcAsm,
259                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                         AttSrcAsm, IntelSrcAsm,
262                         [(set _.RC:$dst, RHS)],
263                         [(set _.RC:$dst, MaskingRHS)],
264                         [(set _.RC:$dst, ZeroMaskingRHS)],
265                         "$src0 = $dst">;
266
267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268                               dag Outs, dag NonTiedIns, string OpcodeStr,
269                               string AttSrcAsm, string IntelSrcAsm,
270                               dag RHS, dag MaskingRHS, bit IsCommutable,
271                               bit IsKCommutable> :
272   AVX512_maskable_custom<O, F, Outs,
273                          !con((ins _.RC:$src1), NonTiedIns),
274                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
277                          [(set _.RC:$dst, RHS)],
278                          [(set _.RC:$dst,
279                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
280                          [(set _.RC:$dst,
281                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282                          "", IsCommutable, IsKCommutable>;
283
284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286// swizzled by ExecutionDomainFix to pxor.
287// We set canFoldAsLoad because this can be converted to a constant-pool
288// load of an all-zeros value if folding it would be beneficial.
289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
295}
296
297let Predicates = [HasAVX512] in {
298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
304}
305
306// Alias instructions that allow VPTERNLOG to be used with a mask to create
307// a mix of all ones and all zeros elements. This is done this way to force
308// the same register to be used as input for all three sources.
309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311                                (ins VK16WM:$mask), "",
312                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313                                                      (v16i32 immAllOnesV),
314                                                      (v16i32 immAllZerosV)))]>;
315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316                                (ins VK8WM:$mask), "",
317                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
318                                           (v8i64 immAllOnesV),
319                                           (v8i64 immAllZerosV)))]>;
320}
321
322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
328}
329
330let Predicates = [HasAVX512] in {
331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
343}
344
345// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346// This is expanded by ExpandPostRAPseudos.
347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350                          [(set FR16X:$dst, fp16imm0)]>;
351  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352                          [(set FR32X:$dst, fp32imm0)]>;
353  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354                          [(set FR64X:$dst, fp64imm0)]>;
355  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356                            [(set VR128X:$dst, fp128imm0)]>;
357}
358
359//===----------------------------------------------------------------------===//
360// AVX-512 - VECTOR INSERT
361//
362
363// Supports two different pattern operators for mask and unmasked ops. Allows
364// null_frag to be passed for one.
365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
366                                  X86VectorVTInfo To,
367                                  SDPatternOperator vinsert_insert,
368                                  SDPatternOperator vinsert_for_mask,
369                                  X86FoldableSchedWrite sched> {
370  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
374                   "$src3, $src2, $src1", "$src1, $src2, $src3",
375                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
376                                         (From.VT From.RC:$src2),
377                                         (iPTR imm)),
378                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379                                           (From.VT From.RC:$src2),
380                                           (iPTR imm))>,
381                   AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
382    let mayLoad = 1 in
383    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
386                   "$src3, $src2, $src1", "$src1, $src2, $src3",
387                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
388                               (From.VT (From.LdFrag addr:$src2)),
389                               (iPTR imm)),
390                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391                               (From.VT (From.LdFrag addr:$src2)),
392                               (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394                   Sched<[sched.Folded, sched.ReadAfterFold]>;
395  }
396}
397
398// Passes the same pattern operator for masked and unmasked ops.
399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
400                            X86VectorVTInfo To,
401                            SDPatternOperator vinsert_insert,
402                            X86FoldableSchedWrite sched> :
403  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
404
405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406                       X86VectorVTInfo To, PatFrag vinsert_insert,
407                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408  let Predicates = p in {
409    def : Pat<(vinsert_insert:$ins
410                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411              (To.VT (!cast<Instruction>(InstrStr#"rr")
412                     To.RC:$src1, From.RC:$src2,
413                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
414
415    def : Pat<(vinsert_insert:$ins
416                  (To.VT To.RC:$src1),
417                  (From.VT (From.LdFrag addr:$src2)),
418                  (iPTR imm)),
419              (To.VT (!cast<Instruction>(InstrStr#"rm")
420                  To.RC:$src1, addr:$src2,
421                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
422  }
423}
424
425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426                            ValueType EltVT64, int Opcode256,
427                            X86FoldableSchedWrite sched> {
428
429  let Predicates = [HasVLX] in
430    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
432                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
433                                 vinsert128_insert, sched>, EVEX_V256;
434
435  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
437                                 X86VectorVTInfo<16, EltVT32, VR512>,
438                                 vinsert128_insert, sched>, EVEX_V512;
439
440  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
442                                 X86VectorVTInfo< 8, EltVT64, VR512>,
443                                 vinsert256_insert, sched>, REX_W, EVEX_V512;
444
445  // Even with DQI we'd like to only use these instructions for masking.
446  let Predicates = [HasVLX, HasDQI] in
447    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
449                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                   null_frag, vinsert128_insert, sched>,
451                                   EVEX_V256, REX_W;
452
453  // Even with DQI we'd like to only use these instructions for masking.
454  let Predicates = [HasDQI] in {
455    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                 X86VectorVTInfo< 8, EltVT64, VR512>,
458                                 null_frag, vinsert128_insert, sched>,
459                                 REX_W, EVEX_V512;
460
461    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
463                                   X86VectorVTInfo<16, EltVT32, VR512>,
464                                   null_frag, vinsert256_insert, sched>,
465                                   EVEX_V512;
466  }
467}
468
469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
472
473// Codegen pattern with the alternative types,
474// Even with AVX512DQ we'll still use these for unmasked operations.
475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
479
480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
484
485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
489
490// Codegen pattern with the alternative types insert VEC128 into VEC256
491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497// Codegen pattern with the alternative types insert VEC128 into VEC512
498defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
499              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
501               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
503              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504// Codegen pattern with the alternative types insert VEC256 into VEC512
505defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
506              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
507defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
508              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
509defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
510              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511
512
513multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
514                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
515                                 PatFrag vinsert_insert,
516                                 SDNodeXForm INSERT_get_vinsert_imm,
517                                 list<Predicate> p> {
518let Predicates = p in {
519  def : Pat<(Cast.VT
520             (vselect_mask Cast.KRCWM:$mask,
521                           (bitconvert
522                            (vinsert_insert:$ins (To.VT To.RC:$src1),
523                                                 (From.VT From.RC:$src2),
524                                                 (iPTR imm))),
525                           Cast.RC:$src0)),
526            (!cast<Instruction>(InstrStr#"rrk")
527             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
528             (INSERT_get_vinsert_imm To.RC:$ins))>;
529  def : Pat<(Cast.VT
530             (vselect_mask Cast.KRCWM:$mask,
531                           (bitconvert
532                            (vinsert_insert:$ins (To.VT To.RC:$src1),
533                                                 (From.VT
534                                                  (bitconvert
535                                                   (From.LdFrag addr:$src2))),
536                                                 (iPTR imm))),
537                           Cast.RC:$src0)),
538            (!cast<Instruction>(InstrStr#"rmk")
539             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
540             (INSERT_get_vinsert_imm To.RC:$ins))>;
541
542  def : Pat<(Cast.VT
543             (vselect_mask Cast.KRCWM:$mask,
544                           (bitconvert
545                            (vinsert_insert:$ins (To.VT To.RC:$src1),
546                                                 (From.VT From.RC:$src2),
547                                                 (iPTR imm))),
548                           Cast.ImmAllZerosV)),
549            (!cast<Instruction>(InstrStr#"rrkz")
550             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
551             (INSERT_get_vinsert_imm To.RC:$ins))>;
552  def : Pat<(Cast.VT
553             (vselect_mask Cast.KRCWM:$mask,
554                           (bitconvert
555                            (vinsert_insert:$ins (To.VT To.RC:$src1),
556                                                 (From.VT (From.LdFrag addr:$src2)),
557                                                 (iPTR imm))),
558                           Cast.ImmAllZerosV)),
559            (!cast<Instruction>(InstrStr#"rmkz")
560             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
561             (INSERT_get_vinsert_imm To.RC:$ins))>;
562}
563}
564
565defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566                             v8f32x_info, vinsert128_insert,
567                             INSERT_get_vinsert128_imm, [HasVLX]>;
568defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
569                             v4f64x_info, vinsert128_insert,
570                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
571
572defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
573                             v8i32x_info, vinsert128_insert,
574                             INSERT_get_vinsert128_imm, [HasVLX]>;
575defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
576                             v8i32x_info, vinsert128_insert,
577                             INSERT_get_vinsert128_imm, [HasVLX]>;
578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
579                             v8i32x_info, vinsert128_insert,
580                             INSERT_get_vinsert128_imm, [HasVLX]>;
581defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
582                             v4i64x_info, vinsert128_insert,
583                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
584defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
585                             v4i64x_info, vinsert128_insert,
586                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
588                             v4i64x_info, vinsert128_insert,
589                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
590
591defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
592                             v16f32_info, vinsert128_insert,
593                             INSERT_get_vinsert128_imm, [HasAVX512]>;
594defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
595                             v8f64_info, vinsert128_insert,
596                             INSERT_get_vinsert128_imm, [HasDQI]>;
597
598defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
599                             v16i32_info, vinsert128_insert,
600                             INSERT_get_vinsert128_imm, [HasAVX512]>;
601defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
602                             v16i32_info, vinsert128_insert,
603                             INSERT_get_vinsert128_imm, [HasAVX512]>;
604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
605                             v16i32_info, vinsert128_insert,
606                             INSERT_get_vinsert128_imm, [HasAVX512]>;
607defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
608                             v8i64_info, vinsert128_insert,
609                             INSERT_get_vinsert128_imm, [HasDQI]>;
610defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
611                             v8i64_info, vinsert128_insert,
612                             INSERT_get_vinsert128_imm, [HasDQI]>;
613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
614                             v8i64_info, vinsert128_insert,
615                             INSERT_get_vinsert128_imm, [HasDQI]>;
616
617defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
618                             v16f32_info, vinsert256_insert,
619                             INSERT_get_vinsert256_imm, [HasDQI]>;
620defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
621                             v8f64_info, vinsert256_insert,
622                             INSERT_get_vinsert256_imm, [HasAVX512]>;
623
624defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
625                             v16i32_info, vinsert256_insert,
626                             INSERT_get_vinsert256_imm, [HasDQI]>;
627defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
628                             v16i32_info, vinsert256_insert,
629                             INSERT_get_vinsert256_imm, [HasDQI]>;
630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
631                             v16i32_info, vinsert256_insert,
632                             INSERT_get_vinsert256_imm, [HasDQI]>;
633defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
634                             v8i64_info, vinsert256_insert,
635                             INSERT_get_vinsert256_imm, [HasAVX512]>;
636defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
637                             v8i64_info, vinsert256_insert,
638                             INSERT_get_vinsert256_imm, [HasAVX512]>;
639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
640                             v8i64_info, vinsert256_insert,
641                             INSERT_get_vinsert256_imm, [HasAVX512]>;
642
643// vinsertps - insert f32 to XMM
644let ExeDomain = SSEPackedSingle in {
645let isCommutable = 1 in
646def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
647      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
648      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
649      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
650      EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
651def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
652      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
653      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
654      [(set VR128X:$dst, (X86insertps VR128X:$src1,
655                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
656                          timm:$src3))]>,
657      EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
658      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
659}
660
661//===----------------------------------------------------------------------===//
662// AVX-512 VECTOR EXTRACT
663//---
664
665// Supports two different pattern operators for mask and unmasked ops. Allows
666// null_frag to be passed for one.
667multiclass vextract_for_size_split<int Opcode,
668                                   X86VectorVTInfo From, X86VectorVTInfo To,
669                                   SDPatternOperator vextract_extract,
670                                   SDPatternOperator vextract_for_mask,
671                                   SchedWrite SchedRR, SchedWrite SchedMR> {
672
673  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
674    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
675                (ins From.RC:$src1, u8imm:$idx),
676                "vextract" # To.EltTypeName # "x" # To.NumElts,
677                "$idx, $src1", "$src1, $idx",
678                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
679                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
680                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
681
682    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
683                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
684                    "vextract" # To.EltTypeName # "x" # To.NumElts #
685                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
686                    [(store (To.VT (vextract_extract:$idx
687                                    (From.VT From.RC:$src1), (iPTR imm))),
688                             addr:$dst)]>, EVEX,
689                    Sched<[SchedMR]>;
690
691    let mayStore = 1, hasSideEffects = 0 in
692    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
693                    (ins To.MemOp:$dst, To.KRCWM:$mask,
694                                        From.RC:$src1, u8imm:$idx),
695                     "vextract" # To.EltTypeName # "x" # To.NumElts #
696                          "\t{$idx, $src1, $dst {${mask}}|"
697                          "$dst {${mask}}, $src1, $idx}", []>,
698                    EVEX_K, EVEX, Sched<[SchedMR]>;
699  }
700}
701
702// Passes the same pattern operator for masked and unmasked ops.
703multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
704                             X86VectorVTInfo To,
705                             SDPatternOperator vextract_extract,
706                             SchedWrite SchedRR, SchedWrite SchedMR> :
707  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
708
709// Codegen pattern for the alternative types
710multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
711                X86VectorVTInfo To, PatFrag vextract_extract,
712                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
713  let Predicates = p in {
714     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
715               (To.VT (!cast<Instruction>(InstrStr#"rr")
716                          From.RC:$src1,
717                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
718     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
719                              (iPTR imm))), addr:$dst),
720               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
721                (EXTRACT_get_vextract_imm To.RC:$ext))>;
722  }
723}
724
725multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
726                             ValueType EltVT64, int Opcode256,
727                             SchedWrite SchedRR, SchedWrite SchedMR> {
728  let Predicates = [HasAVX512] in {
729    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
730                                   X86VectorVTInfo<16, EltVT32, VR512>,
731                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
732                                   vextract128_extract, SchedRR, SchedMR>,
733                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
734    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
735                                   X86VectorVTInfo< 8, EltVT64, VR512>,
736                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
737                                   vextract256_extract, SchedRR, SchedMR>,
738                                       REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
739  }
740  let Predicates = [HasVLX] in
741    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
742                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
743                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
744                                 vextract128_extract, SchedRR, SchedMR>,
745                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
746
747  // Even with DQI we'd like to only use these instructions for masking.
748  let Predicates = [HasVLX, HasDQI] in
749    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
750                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
751                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
752                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
753                                    EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
754
755  // Even with DQI we'd like to only use these instructions for masking.
756  let Predicates = [HasDQI] in {
757    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
758                                 X86VectorVTInfo< 8, EltVT64, VR512>,
759                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
760                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
761                                     REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
762    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
763                                 X86VectorVTInfo<16, EltVT32, VR512>,
764                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
765                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
766                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
767  }
768}
769
770// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
771defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
772defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
773
774// extract_subvector codegen patterns with the alternative types.
775// Even with AVX512DQ we'll still use these for unmasked operations.
776defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
777          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
778defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
779          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
780
781defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
782          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
783defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
784          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
785
786defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
787          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
788defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
789          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
790
791// Codegen pattern with the alternative types extract VEC128 from VEC256
792defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
793          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
795          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
797          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
798
799// Codegen pattern with the alternative types extract VEC128 from VEC512
800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
801                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
802defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
803                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
805                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
806// Codegen pattern with the alternative types extract VEC256 from VEC512
807defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
808                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
809defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
810                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
811defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
812                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
813
814
815// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
816// smaller extract to enable EVEX->VEX.
817let Predicates = [NoVLX] in {
818def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
819          (v2i64 (VEXTRACTI128rr
820                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
821                  (iPTR 1)))>;
822def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
823          (v2f64 (VEXTRACTF128rr
824                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
825                  (iPTR 1)))>;
826def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
827          (v4i32 (VEXTRACTI128rr
828                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
829                  (iPTR 1)))>;
830def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
831          (v4f32 (VEXTRACTF128rr
832                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
833                  (iPTR 1)))>;
834def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
835          (v8i16 (VEXTRACTI128rr
836                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
837                  (iPTR 1)))>;
838def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
839          (v8f16 (VEXTRACTF128rr
840                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
841                  (iPTR 1)))>;
842def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
843          (v16i8 (VEXTRACTI128rr
844                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
845                  (iPTR 1)))>;
846}
847
848// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
849// smaller extract to enable EVEX->VEX.
850let Predicates = [HasVLX] in {
851def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
852          (v2i64 (VEXTRACTI32x4Z256rr
853                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
854                  (iPTR 1)))>;
855def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
856          (v2f64 (VEXTRACTF32x4Z256rr
857                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
858                  (iPTR 1)))>;
859def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
860          (v4i32 (VEXTRACTI32x4Z256rr
861                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
862                  (iPTR 1)))>;
863def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
864          (v4f32 (VEXTRACTF32x4Z256rr
865                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
866                  (iPTR 1)))>;
867def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
868          (v8i16 (VEXTRACTI32x4Z256rr
869                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
870                  (iPTR 1)))>;
871def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
872          (v8f16 (VEXTRACTF32x4Z256rr
873                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
874                  (iPTR 1)))>;
875def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
876          (v16i8 (VEXTRACTI32x4Z256rr
877                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
878                  (iPTR 1)))>;
879}
880
881
882// Additional patterns for handling a bitcast between the vselect and the
883// extract_subvector.
884multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
885                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
886                                  PatFrag vextract_extract,
887                                  SDNodeXForm EXTRACT_get_vextract_imm,
888                                  list<Predicate> p> {
889let Predicates = p in {
890  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
891                                   (bitconvert
892                                    (To.VT (vextract_extract:$ext
893                                            (From.VT From.RC:$src), (iPTR imm)))),
894                                   To.RC:$src0)),
895            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
896                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
897                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
898
899  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
900                                   (bitconvert
901                                    (To.VT (vextract_extract:$ext
902                                            (From.VT From.RC:$src), (iPTR imm)))),
903                                   Cast.ImmAllZerosV)),
904            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
905                      Cast.KRCWM:$mask, From.RC:$src,
906                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
907}
908}
909
910defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
911                              v4f32x_info, vextract128_extract,
912                              EXTRACT_get_vextract128_imm, [HasVLX]>;
913defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
914                              v2f64x_info, vextract128_extract,
915                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
916
917defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
918                              v4i32x_info, vextract128_extract,
919                              EXTRACT_get_vextract128_imm, [HasVLX]>;
920defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921                              v4i32x_info, vextract128_extract,
922                              EXTRACT_get_vextract128_imm, [HasVLX]>;
923defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
924                              v4i32x_info, vextract128_extract,
925                              EXTRACT_get_vextract128_imm, [HasVLX]>;
926defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
927                              v2i64x_info, vextract128_extract,
928                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
929defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
930                              v2i64x_info, vextract128_extract,
931                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
932defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
933                              v2i64x_info, vextract128_extract,
934                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
935
936defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
937                              v4f32x_info, vextract128_extract,
938                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
939defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
940                              v2f64x_info, vextract128_extract,
941                              EXTRACT_get_vextract128_imm, [HasDQI]>;
942
943defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
944                              v4i32x_info, vextract128_extract,
945                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
946defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
947                              v4i32x_info, vextract128_extract,
948                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
949defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
950                              v4i32x_info, vextract128_extract,
951                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
952defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
953                              v2i64x_info, vextract128_extract,
954                              EXTRACT_get_vextract128_imm, [HasDQI]>;
955defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
956                              v2i64x_info, vextract128_extract,
957                              EXTRACT_get_vextract128_imm, [HasDQI]>;
958defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
959                              v2i64x_info, vextract128_extract,
960                              EXTRACT_get_vextract128_imm, [HasDQI]>;
961
962defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
963                              v8f32x_info, vextract256_extract,
964                              EXTRACT_get_vextract256_imm, [HasDQI]>;
965defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
966                              v4f64x_info, vextract256_extract,
967                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
968
969defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
970                              v8i32x_info, vextract256_extract,
971                              EXTRACT_get_vextract256_imm, [HasDQI]>;
972defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
973                              v8i32x_info, vextract256_extract,
974                              EXTRACT_get_vextract256_imm, [HasDQI]>;
975defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
976                              v8i32x_info, vextract256_extract,
977                              EXTRACT_get_vextract256_imm, [HasDQI]>;
978defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
979                              v4i64x_info, vextract256_extract,
980                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
981defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
982                              v4i64x_info, vextract256_extract,
983                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
984defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
985                              v4i64x_info, vextract256_extract,
986                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
987
988// vextractps - extract 32 bits from XMM
989def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
990      (ins VR128X:$src1, u8imm:$src2),
991      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
992      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
993      EVEX, WIG, Sched<[WriteVecExtract]>;
994
995def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
996      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
997      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
998      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
999                          addr:$dst)]>,
1000      EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1001
1002//===---------------------------------------------------------------------===//
1003// AVX-512 BROADCAST
1004//---
1005// broadcast with a scalar argument.
1006multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1007                                   X86VectorVTInfo SrcInfo> {
1008  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1009            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1010             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1011  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1012                                       (X86VBroadcast SrcInfo.FRC:$src),
1013                                       DestInfo.RC:$src0)),
1014            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1015             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1016             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1017  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1018                                       (X86VBroadcast SrcInfo.FRC:$src),
1019                                       DestInfo.ImmAllZerosV)),
1020            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1021             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1022}
1023
1024// Split version to allow mask and broadcast node to be different types. This
1025// helps support the 32x2 broadcasts.
1026multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1027                                     SchedWrite SchedRR, SchedWrite SchedRM,
1028                                     X86VectorVTInfo MaskInfo,
1029                                     X86VectorVTInfo DestInfo,
1030                                     X86VectorVTInfo SrcInfo,
1031                                     bit IsConvertibleToThreeAddress,
1032                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1033                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1034  let hasSideEffects = 0 in
1035  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1036                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1037                    [(set MaskInfo.RC:$dst,
1038                      (MaskInfo.VT
1039                       (bitconvert
1040                        (DestInfo.VT
1041                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1042                    DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1043  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1044                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1045                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1046                       "${dst} {${mask}} {z}, $src}"),
1047                       [(set MaskInfo.RC:$dst,
1048                         (vselect_mask MaskInfo.KRCWM:$mask,
1049                          (MaskInfo.VT
1050                           (bitconvert
1051                            (DestInfo.VT
1052                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1053                          MaskInfo.ImmAllZerosV))],
1054                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1055  let Constraints = "$src0 = $dst" in
1056  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1057                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1058                          SrcInfo.RC:$src),
1059                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1060                     "${dst} {${mask}}, $src}"),
1061                     [(set MaskInfo.RC:$dst,
1062                       (vselect_mask MaskInfo.KRCWM:$mask,
1063                        (MaskInfo.VT
1064                         (bitconvert
1065                          (DestInfo.VT
1066                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1067                        MaskInfo.RC:$src0))],
1068                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1069
1070  let hasSideEffects = 0, mayLoad = 1 in
1071  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1072                    (ins SrcInfo.ScalarMemOp:$src),
1073                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1074                    [(set MaskInfo.RC:$dst,
1075                      (MaskInfo.VT
1076                       (bitconvert
1077                        (DestInfo.VT
1078                         (UnmaskedBcastOp addr:$src)))))],
1079                    DestInfo.ExeDomain>, T8, PD, EVEX,
1080                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1081
1082  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1083                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1084                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1085                       "${dst} {${mask}} {z}, $src}"),
1086                       [(set MaskInfo.RC:$dst,
1087                         (vselect_mask MaskInfo.KRCWM:$mask,
1088                          (MaskInfo.VT
1089                           (bitconvert
1090                            (DestInfo.VT
1091                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1092                          MaskInfo.ImmAllZerosV))],
1093                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1094                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1095
1096  let Constraints = "$src0 = $dst",
1097      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1098  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1099                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1100                          SrcInfo.ScalarMemOp:$src),
1101                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1102                     "${dst} {${mask}}, $src}"),
1103                     [(set MaskInfo.RC:$dst,
1104                       (vselect_mask MaskInfo.KRCWM:$mask,
1105                        (MaskInfo.VT
1106                         (bitconvert
1107                          (DestInfo.VT
1108                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1109                        MaskInfo.RC:$src0))],
1110                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1111                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1112}
1113
1114// Helper class to force mask and broadcast result to same type.
1115multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1116                               SchedWrite SchedRR, SchedWrite SchedRM,
1117                               X86VectorVTInfo DestInfo,
1118                               X86VectorVTInfo SrcInfo,
1119                               bit IsConvertibleToThreeAddress> :
1120  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1121                            DestInfo, DestInfo, SrcInfo,
1122                            IsConvertibleToThreeAddress>;
1123
1124multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1125                                  AVX512VLVectorVTInfo _> {
1126  let Predicates = [HasAVX512] in {
1127    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1128                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1129              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1130              EVEX_V512;
1131  }
1132
1133  let Predicates = [HasVLX] in {
1134    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1135                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1136                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1137                 EVEX_V256;
1138  }
1139}
1140
1141multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1142                                  AVX512VLVectorVTInfo _> {
1143  let Predicates = [HasAVX512] in {
1144    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1145                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1146              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1147              EVEX_V512;
1148  }
1149
1150  let Predicates = [HasVLX] in {
1151    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1152                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1153                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1154                 EVEX_V256;
1155    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1156                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1157                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1158                 EVEX_V128;
1159  }
1160}
1161defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1162                                       avx512vl_f32_info>;
1163defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1164                                       avx512vl_f64_info>, REX_W;
1165
1166multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1167                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1168                                    RegisterClass SrcRC> {
1169  // Fold with a mask even if it has multiple uses since it is cheap.
1170  let ExeDomain = _.ExeDomain in
1171  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1172                          (ins SrcRC:$src),
1173                          "vpbroadcast"#_.Suffix, "$src", "$src",
1174                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1175                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1176                          T8, PD, EVEX, Sched<[SchedRR]>;
1177}
1178
1179multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1180                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1181                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1182  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1183  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1184                         (outs _.RC:$dst), (ins GR32:$src),
1185                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1186                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1187                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1188                         "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1189
1190  def : Pat <(_.VT (OpNode SrcRC:$src)),
1191             (!cast<Instruction>(Name#rr)
1192              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1193
1194  // Fold with a mask even if it has multiple uses since it is cheap.
1195  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1196             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1197              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1198
1199  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1200             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1201              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1202}
1203
1204multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1205                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1206                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1207  let Predicates = [prd] in
1208    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1209              OpNode, SrcRC, Subreg>, EVEX_V512;
1210  let Predicates = [prd, HasVLX] in {
1211    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1212              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1213    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1214              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1215  }
1216}
1217
1218multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1219                                       SDPatternOperator OpNode,
1220                                       RegisterClass SrcRC, Predicate prd> {
1221  let Predicates = [prd] in
1222    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1223                                      SrcRC>, EVEX_V512;
1224  let Predicates = [prd, HasVLX] in {
1225    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1226                                         SrcRC>, EVEX_V256;
1227    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1228                                         SrcRC>, EVEX_V128;
1229  }
1230}
1231
1232defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1233                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1234defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1235                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1236                       HasBWI>;
1237defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1238                                                 X86VBroadcast, GR32, HasAVX512>;
1239defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1240                                                 X86VBroadcast, GR64, HasAVX512>, REX_W;
1241
1242multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1243                                      AVX512VLVectorVTInfo _, Predicate prd,
1244                                      bit IsConvertibleToThreeAddress> {
1245  let Predicates = [prd] in {
1246    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1247                                   WriteShuffle256Ld, _.info512, _.info128,
1248                                   IsConvertibleToThreeAddress>,
1249                                  EVEX_V512;
1250  }
1251  let Predicates = [prd, HasVLX] in {
1252    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1253                                    WriteShuffle256Ld, _.info256, _.info128,
1254                                    IsConvertibleToThreeAddress>,
1255                                 EVEX_V256;
1256    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1257                                    WriteShuffleXLd, _.info128, _.info128,
1258                                    IsConvertibleToThreeAddress>,
1259                                 EVEX_V128;
1260  }
1261}
1262
1263defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1264                                           avx512vl_i8_info, HasBWI, 0>;
1265defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1266                                           avx512vl_i16_info, HasBWI, 0>;
1267defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1268                                           avx512vl_i32_info, HasAVX512, 1>;
1269defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1270                                           avx512vl_i64_info, HasAVX512, 1>, REX_W;
1271
1272multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1273                                      SDPatternOperator OpNode,
1274                                      X86VectorVTInfo _Dst,
1275                                      X86VectorVTInfo _Src> {
1276  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1277                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1278                           (_Dst.VT (OpNode addr:$src))>,
1279                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1280                           AVX5128IBase, EVEX;
1281}
1282
1283// This should be used for the AVX512DQ broadcast instructions. It disables
1284// the unmasked patterns so that we only use the DQ instructions when masking
1285//  is requested.
1286multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1287                                         SDPatternOperator OpNode,
1288                                         X86VectorVTInfo _Dst,
1289                                         X86VectorVTInfo _Src> {
1290  let hasSideEffects = 0, mayLoad = 1 in
1291  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1292                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1293                           (null_frag),
1294                           (_Dst.VT (OpNode addr:$src))>,
1295                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1296                           AVX5128IBase, EVEX;
1297}
1298let Predicates = [HasBWI] in {
1299  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1300            (VPBROADCASTWZrm addr:$src)>;
1301
1302  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1303            (VPBROADCASTWZrr VR128X:$src)>;
1304  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1305            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1306}
1307let Predicates = [HasVLX, HasBWI] in {
1308  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1309            (VPBROADCASTWZ128rm addr:$src)>;
1310  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1311            (VPBROADCASTWZ256rm addr:$src)>;
1312
1313  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1314            (VPBROADCASTWZ128rr VR128X:$src)>;
1315  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1316            (VPBROADCASTWZ256rr VR128X:$src)>;
1317
1318  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1319            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1320  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1321            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1322}
1323
1324//===----------------------------------------------------------------------===//
1325// AVX-512 BROADCAST SUBVECTORS
1326//
1327
1328defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1329                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1330                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1331defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1332                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1333                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1334defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1335                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1336                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1337defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1338                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1339                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1340
1341let Predicates = [HasAVX512] in {
1342def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1343          (VBROADCASTF64X4rm addr:$src)>;
1344def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1345          (VBROADCASTF64X4rm addr:$src)>;
1346def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1347          (VBROADCASTF64X4rm addr:$src)>;
1348def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1349          (VBROADCASTI64X4rm addr:$src)>;
1350def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1351          (VBROADCASTI64X4rm addr:$src)>;
1352def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1353          (VBROADCASTI64X4rm addr:$src)>;
1354def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1355          (VBROADCASTI64X4rm addr:$src)>;
1356
1357def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1358          (VBROADCASTF32X4rm addr:$src)>;
1359def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1360          (VBROADCASTF32X4rm addr:$src)>;
1361def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1362          (VBROADCASTF32X4rm addr:$src)>;
1363def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1364          (VBROADCASTI32X4rm addr:$src)>;
1365def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1366          (VBROADCASTI32X4rm addr:$src)>;
1367def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1368          (VBROADCASTI32X4rm addr:$src)>;
1369def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1370          (VBROADCASTI32X4rm addr:$src)>;
1371
1372// Patterns for selects of bitcasted operations.
1373def : Pat<(vselect_mask VK16WM:$mask,
1374                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1375                        (v16f32 immAllZerosV)),
1376          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1377def : Pat<(vselect_mask VK16WM:$mask,
1378                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1379                        VR512:$src0),
1380          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1381def : Pat<(vselect_mask VK16WM:$mask,
1382                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1383                        (v16i32 immAllZerosV)),
1384          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1385def : Pat<(vselect_mask VK16WM:$mask,
1386                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1387                        VR512:$src0),
1388          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1389
1390def : Pat<(vselect_mask VK8WM:$mask,
1391                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1392                        (v8f64 immAllZerosV)),
1393          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1394def : Pat<(vselect_mask VK8WM:$mask,
1395                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1396                        VR512:$src0),
1397          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1398def : Pat<(vselect_mask VK8WM:$mask,
1399                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1400                        (v8i64 immAllZerosV)),
1401          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1402def : Pat<(vselect_mask VK8WM:$mask,
1403                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1404                        VR512:$src0),
1405          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1406}
1407
1408let Predicates = [HasVLX] in {
1409defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1410                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1411                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1412defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1413                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1414                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1415
1416def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1417          (VBROADCASTF32X4Z256rm addr:$src)>;
1418def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1419          (VBROADCASTF32X4Z256rm addr:$src)>;
1420def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1421          (VBROADCASTF32X4Z256rm addr:$src)>;
1422def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1423          (VBROADCASTI32X4Z256rm addr:$src)>;
1424def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1425          (VBROADCASTI32X4Z256rm addr:$src)>;
1426def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1427          (VBROADCASTI32X4Z256rm addr:$src)>;
1428def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1429          (VBROADCASTI32X4Z256rm addr:$src)>;
1430
1431// Patterns for selects of bitcasted operations.
1432def : Pat<(vselect_mask VK8WM:$mask,
1433                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1434                        (v8f32 immAllZerosV)),
1435          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1436def : Pat<(vselect_mask VK8WM:$mask,
1437                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1438                        VR256X:$src0),
1439          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1440def : Pat<(vselect_mask VK8WM:$mask,
1441                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1442                        (v8i32 immAllZerosV)),
1443          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1444def : Pat<(vselect_mask VK8WM:$mask,
1445                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1446                        VR256X:$src0),
1447          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1448}
1449
1450let Predicates = [HasBF16] in {
1451  def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1452            (VBROADCASTF64X4rm addr:$src)>;
1453  def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1454            (VBROADCASTF32X4rm addr:$src)>;
1455}
1456
1457let Predicates = [HasBF16, HasVLX] in
1458  def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1459            (VBROADCASTF32X4Z256rm addr:$src)>;
1460
1461let Predicates = [HasVLX, HasDQI] in {
1462defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1463                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1464                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1465defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1466                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1467                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1468
1469// Patterns for selects of bitcasted operations.
1470def : Pat<(vselect_mask VK4WM:$mask,
1471                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1472                        (v4f64 immAllZerosV)),
1473          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1474def : Pat<(vselect_mask VK4WM:$mask,
1475                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1476                        VR256X:$src0),
1477          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1478def : Pat<(vselect_mask VK4WM:$mask,
1479                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1480                        (v4i64 immAllZerosV)),
1481          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1482def : Pat<(vselect_mask VK4WM:$mask,
1483                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1484                        VR256X:$src0),
1485          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1486}
1487
1488let Predicates = [HasDQI] in {
1489defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1490                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1491                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1492defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1493                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1494                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1495defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1496                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1497                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1498defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1499                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1500                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1501
1502// Patterns for selects of bitcasted operations.
1503def : Pat<(vselect_mask VK16WM:$mask,
1504                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1505                        (v16f32 immAllZerosV)),
1506          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1507def : Pat<(vselect_mask VK16WM:$mask,
1508                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1509                        VR512:$src0),
1510          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1511def : Pat<(vselect_mask VK16WM:$mask,
1512                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1513                        (v16i32 immAllZerosV)),
1514          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1515def : Pat<(vselect_mask VK16WM:$mask,
1516                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1517                        VR512:$src0),
1518          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1519
1520def : Pat<(vselect_mask VK8WM:$mask,
1521                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1522                        (v8f64 immAllZerosV)),
1523          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1524def : Pat<(vselect_mask VK8WM:$mask,
1525                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1526                        VR512:$src0),
1527          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1528def : Pat<(vselect_mask VK8WM:$mask,
1529                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1530                        (v8i64 immAllZerosV)),
1531          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1532def : Pat<(vselect_mask VK8WM:$mask,
1533                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1534                        VR512:$src0),
1535          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1536}
1537
1538multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1539                                        AVX512VLVectorVTInfo _Dst,
1540                                        AVX512VLVectorVTInfo _Src> {
1541  let Predicates = [HasDQI] in
1542    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1543                                          WriteShuffle256Ld, _Dst.info512,
1544                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1545                                          EVEX_V512;
1546  let Predicates = [HasDQI, HasVLX] in
1547    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1548                                          WriteShuffle256Ld, _Dst.info256,
1549                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1550                                          EVEX_V256;
1551}
1552
1553multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1554                                         AVX512VLVectorVTInfo _Dst,
1555                                         AVX512VLVectorVTInfo _Src> :
1556  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1557
1558  let Predicates = [HasDQI, HasVLX] in
1559    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1560                                          WriteShuffleXLd, _Dst.info128,
1561                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1562                                          EVEX_V128;
1563}
1564
1565defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1566                                          avx512vl_i32_info, avx512vl_i64_info>;
1567defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1568                                          avx512vl_f32_info, avx512vl_f64_info>;
1569
1570//===----------------------------------------------------------------------===//
1571// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1572//---
1573multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1574                                  X86VectorVTInfo _, RegisterClass KRC> {
1575  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1576                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1577                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1578                  EVEX, Sched<[WriteShuffle]>;
1579}
1580
1581multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1582                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1583  let Predicates = [HasCDI] in
1584    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1585  let Predicates = [HasCDI, HasVLX] in {
1586    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1587    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1588  }
1589}
1590
1591defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1592                                               avx512vl_i32_info, VK16>;
1593defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1594                                               avx512vl_i64_info, VK8>, REX_W;
1595
1596//===----------------------------------------------------------------------===//
1597// -- VPERMI2 - 3 source operands form --
1598multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1599                         X86FoldableSchedWrite sched,
1600                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1601let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1602    hasSideEffects = 0 in {
1603  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1604          (ins _.RC:$src2, _.RC:$src3),
1605          OpcodeStr, "$src3, $src2", "$src2, $src3",
1606          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1607          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1608
1609  let mayLoad = 1 in
1610  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1611            (ins _.RC:$src2, _.MemOp:$src3),
1612            OpcodeStr, "$src3, $src2", "$src2, $src3",
1613            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1614                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1615            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1616  }
1617}
1618
1619multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1620                            X86FoldableSchedWrite sched,
1621                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1622  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1623      hasSideEffects = 0, mayLoad = 1 in
1624  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1625              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1626              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1627              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1628              (_.VT (X86VPermt2 _.RC:$src2,
1629               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1630              AVX5128IBase, EVEX, VVVV, EVEX_B,
1631              Sched<[sched.Folded, sched.ReadAfterFold]>;
1632}
1633
1634multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1635                               X86FoldableSchedWrite sched,
1636                               AVX512VLVectorVTInfo VTInfo,
1637                               AVX512VLVectorVTInfo ShuffleMask> {
1638  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1639                             ShuffleMask.info512>,
1640               avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1641                                ShuffleMask.info512>, EVEX_V512;
1642  let Predicates = [HasVLX] in {
1643  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1644                                ShuffleMask.info128>,
1645                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1646                                   ShuffleMask.info128>, EVEX_V128;
1647  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1648                                ShuffleMask.info256>,
1649                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1650                                   ShuffleMask.info256>, EVEX_V256;
1651  }
1652}
1653
1654multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1655                                  X86FoldableSchedWrite sched,
1656                                  AVX512VLVectorVTInfo VTInfo,
1657                                  AVX512VLVectorVTInfo Idx,
1658                                  Predicate Prd> {
1659  let Predicates = [Prd] in
1660  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1661                             Idx.info512>, EVEX_V512;
1662  let Predicates = [Prd, HasVLX] in {
1663  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1664                                Idx.info128>, EVEX_V128;
1665  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1666                                Idx.info256>,  EVEX_V256;
1667  }
1668}
1669
1670defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1671                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1672defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1673                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1674defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1675                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1676                  REX_W, EVEX_CD8<16, CD8VF>;
1677defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1678                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1679                  EVEX_CD8<8, CD8VF>;
1680defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1681                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1682defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1683                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1684
1685// Extra patterns to deal with extra bitcasts due to passthru and index being
1686// different types on the fp versions.
1687multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1688                                  X86VectorVTInfo IdxVT,
1689                                  X86VectorVTInfo CastVT> {
1690  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1691                                (X86VPermt2 (_.VT _.RC:$src2),
1692                                            (IdxVT.VT (bitconvert
1693                                                       (CastVT.VT _.RC:$src1))),
1694                                            _.RC:$src3),
1695                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1696            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1697                                                _.RC:$src2, _.RC:$src3)>;
1698  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1699                                (X86VPermt2 _.RC:$src2,
1700                                            (IdxVT.VT (bitconvert
1701                                                       (CastVT.VT _.RC:$src1))),
1702                                            (_.LdFrag addr:$src3)),
1703                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1704            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1705                                                _.RC:$src2, addr:$src3)>;
1706  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1707                                 (X86VPermt2 _.RC:$src2,
1708                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1709                                             (_.BroadcastLdFrag addr:$src3)),
1710                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1711            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1712                                                 _.RC:$src2, addr:$src3)>;
1713}
1714
1715// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1716defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1717defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1718defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1719
1720// VPERMT2
1721multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1722                         X86FoldableSchedWrite sched,
1723                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1724let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1725  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1726          (ins IdxVT.RC:$src2, _.RC:$src3),
1727          OpcodeStr, "$src3, $src2", "$src2, $src3",
1728          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1729          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1730
1731  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1732            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1733            OpcodeStr, "$src3, $src2", "$src2, $src3",
1734            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1735                   (_.LdFrag addr:$src3))), 1>,
1736            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1737  }
1738}
1739multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1740                            X86FoldableSchedWrite sched,
1741                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1742  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1743  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1745              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1746              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1747              (_.VT (X86VPermt2 _.RC:$src1,
1748               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1749              AVX5128IBase, EVEX, VVVV, EVEX_B,
1750              Sched<[sched.Folded, sched.ReadAfterFold]>;
1751}
1752
1753multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1754                               X86FoldableSchedWrite sched,
1755                               AVX512VLVectorVTInfo VTInfo,
1756                               AVX512VLVectorVTInfo ShuffleMask> {
1757  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1758                             ShuffleMask.info512>,
1759               avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1760                                ShuffleMask.info512>, EVEX_V512;
1761  let Predicates = [HasVLX] in {
1762  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1763                                ShuffleMask.info128>,
1764                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1765                                   ShuffleMask.info128>, EVEX_V128;
1766  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1767                                ShuffleMask.info256>,
1768                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1769                                    ShuffleMask.info256>, EVEX_V256;
1770  }
1771}
1772
1773multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1774                                  X86FoldableSchedWrite sched,
1775                                  AVX512VLVectorVTInfo VTInfo,
1776                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1777  let Predicates = [Prd] in
1778  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1779                             Idx.info512>, EVEX_V512;
1780  let Predicates = [Prd, HasVLX] in {
1781  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1782                                Idx.info128>, EVEX_V128;
1783  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1784                                Idx.info256>, EVEX_V256;
1785  }
1786}
1787
1788defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1789                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1790defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1791                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1792defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1793                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1794                  REX_W, EVEX_CD8<16, CD8VF>;
1795defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1796                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1797                  EVEX_CD8<8, CD8VF>;
1798defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1799                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1800defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1801                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1802
1803//===----------------------------------------------------------------------===//
1804// AVX-512 - BLEND using mask
1805//
1806
1807multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1808                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1809  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1810  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1811             (ins _.RC:$src1, _.RC:$src2),
1812             !strconcat(OpcodeStr,
1813             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1814             EVEX, VVVV, Sched<[sched]>;
1815  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1816             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1817             !strconcat(OpcodeStr,
1818             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1819             []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1820  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1821             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1822             !strconcat(OpcodeStr,
1823             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1824             []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1825  let mayLoad = 1 in {
1826  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1827             (ins _.RC:$src1, _.MemOp:$src2),
1828             !strconcat(OpcodeStr,
1829             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1830             []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1831             Sched<[sched.Folded, sched.ReadAfterFold]>;
1832  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1834             !strconcat(OpcodeStr,
1835             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1836             []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1837             Sched<[sched.Folded, sched.ReadAfterFold]>;
1838  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840             !strconcat(OpcodeStr,
1841             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1842             []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1843             Sched<[sched.Folded, sched.ReadAfterFold]>;
1844  }
1845  }
1846}
1847multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1848                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1849  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1850  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1852       !strconcat(OpcodeStr,
1853            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1854            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1855      EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1856      Sched<[sched.Folded, sched.ReadAfterFold]>;
1857
1858  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1859      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1860       !strconcat(OpcodeStr,
1861            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1862            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1863      EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1864      Sched<[sched.Folded, sched.ReadAfterFold]>;
1865
1866  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1867      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1868       !strconcat(OpcodeStr,
1869            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1870            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1871      EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1872      Sched<[sched.Folded, sched.ReadAfterFold]>;
1873  }
1874}
1875
1876multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1877                        AVX512VLVectorVTInfo VTInfo> {
1878  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1879           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1880                                 EVEX_V512;
1881
1882  let Predicates = [HasVLX] in {
1883    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1884                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1885                                      EVEX_V256;
1886    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1887                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1888                                      EVEX_V128;
1889  }
1890}
1891
1892multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1893                        AVX512VLVectorVTInfo VTInfo> {
1894  let Predicates = [HasBWI] in
1895    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1896                               EVEX_V512;
1897
1898  let Predicates = [HasBWI, HasVLX] in {
1899    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1900                                  EVEX_V256;
1901    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1902                                  EVEX_V128;
1903  }
1904}
1905
1906defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1907                              avx512vl_f32_info>;
1908defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1909                              avx512vl_f64_info>, REX_W;
1910defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1911                              avx512vl_i32_info>;
1912defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1913                              avx512vl_i64_info>, REX_W;
1914defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1915                              avx512vl_i8_info>;
1916defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1917                              avx512vl_i16_info>, REX_W;
1918
1919//===----------------------------------------------------------------------===//
1920// Compare Instructions
1921//===----------------------------------------------------------------------===//
1922
1923// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1924
1925multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1926                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1927                             X86FoldableSchedWrite sched> {
1928  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1929                      (outs _.KRC:$dst),
1930                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1931                      "vcmp"#_.Suffix,
1932                      "$cc, $src2, $src1", "$src1, $src2, $cc",
1933                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1934                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1935                                 timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1936  let mayLoad = 1 in
1937  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1938                    (outs _.KRC:$dst),
1939                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1940                    "vcmp"#_.Suffix,
1941                    "$cc, $src2, $src1", "$src1, $src2, $cc",
1942                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1943                        timm:$cc),
1944                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1945                        timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1946                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1947
1948  let Uses = [MXCSR] in
1949  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1950                     (outs _.KRC:$dst),
1951                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1952                     "vcmp"#_.Suffix,
1953                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1954                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1955                                timm:$cc),
1956                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1957                                   timm:$cc)>,
1958                     EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1959
1960  let isCodeGenOnly = 1 in {
1961    let isCommutable = 1 in
1962    def rr : AVX512Ii8<0xC2, MRMSrcReg,
1963                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1964                !strconcat("vcmp", _.Suffix,
1965                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1966                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1967                                          _.FRC:$src2,
1968                                          timm:$cc))]>,
1969                EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1970    def rm : AVX512Ii8<0xC2, MRMSrcMem,
1971              (outs _.KRC:$dst),
1972              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1973              !strconcat("vcmp", _.Suffix,
1974                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1975              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1976                                        (_.ScalarLdFrag addr:$src2),
1977                                        timm:$cc))]>,
1978              EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1979              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1980  }
1981}
1982
1983let Predicates = [HasAVX512] in {
1984  let ExeDomain = SSEPackedSingle in
1985  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1986                                   X86cmpms_su, X86cmpmsSAE_su,
1987                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
1988  let ExeDomain = SSEPackedDouble in
1989  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
1990                                   X86cmpms_su, X86cmpmsSAE_su,
1991                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
1992}
1993let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
1994  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
1995                                   X86cmpms_su, X86cmpmsSAE_su,
1996                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
1997
1998multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
1999                              X86FoldableSchedWrite sched,
2000                              X86VectorVTInfo _, bit IsCommutable> {
2001  let isCommutable = IsCommutable, hasSideEffects = 0 in
2002  def rr : AVX512BI<opc, MRMSrcReg,
2003             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2004             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2005             []>, EVEX, VVVV, Sched<[sched]>;
2006  let mayLoad = 1, hasSideEffects = 0 in
2007  def rm : AVX512BI<opc, MRMSrcMem,
2008             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2009             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2010             []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2011  let isCommutable = IsCommutable, hasSideEffects = 0 in
2012  def rrk : AVX512BI<opc, MRMSrcReg,
2013              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2014              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2015                          "$dst {${mask}}, $src1, $src2}"),
2016              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2017  let mayLoad = 1, hasSideEffects = 0 in
2018  def rmk : AVX512BI<opc, MRMSrcMem,
2019              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2020              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2021                          "$dst {${mask}}, $src1, $src2}"),
2022              []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2023}
2024
2025multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2026                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2027                                  bit IsCommutable> :
2028           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2029  let mayLoad = 1, hasSideEffects = 0 in {
2030  def rmb : AVX512BI<opc, MRMSrcMem,
2031              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2032              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2033                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2034              []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2035  def rmbk : AVX512BI<opc, MRMSrcMem,
2036               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2037                                       _.ScalarMemOp:$src2),
2038               !strconcat(OpcodeStr,
2039                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2040                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2041               []>, EVEX, VVVV, EVEX_K, EVEX_B,
2042               Sched<[sched.Folded, sched.ReadAfterFold]>;
2043  }
2044}
2045
2046multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2047                                 X86SchedWriteWidths sched,
2048                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2049                                 bit IsCommutable = 0> {
2050  let Predicates = [prd] in
2051  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2052                              VTInfo.info512, IsCommutable>, EVEX_V512;
2053
2054  let Predicates = [prd, HasVLX] in {
2055    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2056                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2057    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2058                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2059  }
2060}
2061
2062multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2063                                     X86SchedWriteWidths sched,
2064                                     AVX512VLVectorVTInfo VTInfo,
2065                                     Predicate prd, bit IsCommutable = 0> {
2066  let Predicates = [prd] in
2067  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2068                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2069
2070  let Predicates = [prd, HasVLX] in {
2071    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2072                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2073    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2074                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2075  }
2076}
2077
2078// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2079// increase the pattern complexity the way an immediate would.
2080let AddedComplexity = 2 in {
2081// FIXME: Is there a better scheduler class for VPCMP?
2082defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2083                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2084                EVEX_CD8<8, CD8VF>, WIG;
2085
2086defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2087                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2088                EVEX_CD8<16, CD8VF>, WIG;
2089
2090defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2091                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2092                EVEX_CD8<32, CD8VF>;
2093
2094defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2095                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2096                T8, REX_W, EVEX_CD8<64, CD8VF>;
2097
2098defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2099                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2100                EVEX_CD8<8, CD8VF>, WIG;
2101
2102defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2103                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2104                EVEX_CD8<16, CD8VF>, WIG;
2105
2106defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2107                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2108                EVEX_CD8<32, CD8VF>;
2109
2110defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2111                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2112                T8, REX_W, EVEX_CD8<64, CD8VF>;
2113}
2114
2115multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2116                          PatFrag Frag_su,
2117                          X86FoldableSchedWrite sched,
2118                          X86VectorVTInfo _, string Name> {
2119  let isCommutable = 1 in
2120  def rri : AVX512AIi8<opc, MRMSrcReg,
2121             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2122             !strconcat("vpcmp", Suffix,
2123                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2124             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2125                                                (_.VT _.RC:$src2),
2126                                                cond)))]>,
2127             EVEX, VVVV, Sched<[sched]>;
2128  def rmi : AVX512AIi8<opc, MRMSrcMem,
2129             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2130             !strconcat("vpcmp", Suffix,
2131                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2132             [(set _.KRC:$dst, (_.KVT
2133                                (Frag:$cc
2134                                 (_.VT _.RC:$src1),
2135                                 (_.VT (_.LdFrag addr:$src2)),
2136                                 cond)))]>,
2137             EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2138  let isCommutable = 1 in
2139  def rrik : AVX512AIi8<opc, MRMSrcReg,
2140              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2141                                      u8imm:$cc),
2142              !strconcat("vpcmp", Suffix,
2143                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2144                         "$dst {${mask}}, $src1, $src2, $cc}"),
2145              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2146                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2147                                                         (_.VT _.RC:$src2),
2148                                                         cond))))]>,
2149              EVEX, VVVV, EVEX_K, Sched<[sched]>;
2150  def rmik : AVX512AIi8<opc, MRMSrcMem,
2151              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2152                                    u8imm:$cc),
2153              !strconcat("vpcmp", Suffix,
2154                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2155                         "$dst {${mask}}, $src1, $src2, $cc}"),
2156              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2157                                     (_.KVT
2158                                      (Frag_su:$cc
2159                                       (_.VT _.RC:$src1),
2160                                       (_.VT (_.LdFrag addr:$src2)),
2161                                       cond))))]>,
2162              EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2163
2164  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2165                             (_.VT _.RC:$src1), cond)),
2166            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2167             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2168
2169  def : Pat<(and _.KRCWM:$mask,
2170                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2171                                     (_.VT _.RC:$src1), cond))),
2172            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2173             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2174             (X86pcmpm_imm_commute $cc))>;
2175}
2176
2177multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2178                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2179                              X86VectorVTInfo _, string Name> :
2180           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2181  def rmib : AVX512AIi8<opc, MRMSrcMem,
2182             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2183                                     u8imm:$cc),
2184             !strconcat("vpcmp", Suffix,
2185                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2186                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2187             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2188                                       (_.VT _.RC:$src1),
2189                                       (_.BroadcastLdFrag addr:$src2),
2190                                       cond)))]>,
2191             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2192  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2193              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2194                                       _.ScalarMemOp:$src2, u8imm:$cc),
2195              !strconcat("vpcmp", Suffix,
2196                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2197                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2198              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2199                                     (_.KVT (Frag_su:$cc
2200                                             (_.VT _.RC:$src1),
2201                                             (_.BroadcastLdFrag addr:$src2),
2202                                             cond))))]>,
2203              EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204
2205  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2206                    (_.VT _.RC:$src1), cond)),
2207            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2208             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2209
2210  def : Pat<(and _.KRCWM:$mask,
2211                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2212                                     (_.VT _.RC:$src1), cond))),
2213            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2214             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2215             (X86pcmpm_imm_commute $cc))>;
2216}
2217
2218multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2219                             PatFrag Frag_su, X86SchedWriteWidths sched,
2220                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2221  let Predicates = [prd] in
2222  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2223                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2224
2225  let Predicates = [prd, HasVLX] in {
2226    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2227                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2228    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2229                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2230  }
2231}
2232
2233multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2234                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2235                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2236  let Predicates = [prd] in
2237  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2238                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2239
2240  let Predicates = [prd, HasVLX] in {
2241    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2242                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2243    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2244                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2245  }
2246}
2247
2248// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2249defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2250                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2251                                EVEX_CD8<8, CD8VF>;
2252defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2253                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2254                                 EVEX_CD8<8, CD8VF>;
2255
2256defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2257                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2258                                REX_W, EVEX_CD8<16, CD8VF>;
2259defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2260                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261                                 REX_W, EVEX_CD8<16, CD8VF>;
2262
2263defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2264                                    SchedWriteVecALU, avx512vl_i32_info,
2265                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2266defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2267                                     SchedWriteVecALU, avx512vl_i32_info,
2268                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2269
2270defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2271                                    SchedWriteVecALU, avx512vl_i64_info,
2272                                    HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2273defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2274                                     SchedWriteVecALU, avx512vl_i64_info,
2275                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2276
2277multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2278                              string Name> {
2279let Uses = [MXCSR], mayRaiseFPException = 1 in {
2280  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2281                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2282                   "vcmp"#_.Suffix,
2283                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2284                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2285                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2286                   1>, Sched<[sched]>;
2287
2288  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2289                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2290                "vcmp"#_.Suffix,
2291                "$cc, $src2, $src1", "$src1, $src2, $cc",
2292                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2293                             timm:$cc),
2294                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2295                            timm:$cc)>,
2296                Sched<[sched.Folded, sched.ReadAfterFold]>;
2297
2298  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2299                (outs _.KRC:$dst),
2300                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2301                "vcmp"#_.Suffix,
2302                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2303                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2304                (X86any_cmpm (_.VT _.RC:$src1),
2305                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2306                             timm:$cc),
2307                (X86cmpm_su (_.VT _.RC:$src1),
2308                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2309                            timm:$cc)>,
2310                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2311  }
2312
2313  // Patterns for selecting with loads in other operand.
2314  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2315                         timm:$cc),
2316            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2317                                                      (X86cmpm_imm_commute timm:$cc))>;
2318
2319  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2320                                            (_.VT _.RC:$src1),
2321                                            timm:$cc)),
2322            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2323                                                       _.RC:$src1, addr:$src2,
2324                                                       (X86cmpm_imm_commute timm:$cc))>;
2325
2326  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2327                         (_.VT _.RC:$src1), timm:$cc),
2328            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2329                                                       (X86cmpm_imm_commute timm:$cc))>;
2330
2331  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2332                                            (_.VT _.RC:$src1),
2333                                            timm:$cc)),
2334            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2335                                                        _.RC:$src1, addr:$src2,
2336                                                        (X86cmpm_imm_commute timm:$cc))>;
2337
2338  // Patterns for mask intrinsics.
2339  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2340                      (_.KVT immAllOnesV)),
2341            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2342
2343  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2344            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2345                                                       _.RC:$src2, timm:$cc)>;
2346
2347  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2348                      (_.KVT immAllOnesV)),
2349            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2350
2351  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2352                      _.KRCWM:$mask),
2353            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2354                                                       addr:$src2, timm:$cc)>;
2355
2356  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2357                      (_.KVT immAllOnesV)),
2358            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2359
2360  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2361                      _.KRCWM:$mask),
2362            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2363                                                        addr:$src2, timm:$cc)>;
2364
2365  // Patterns for mask intrinsics with loads in other operand.
2366  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2367                      (_.KVT immAllOnesV)),
2368            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2369                                                      (X86cmpm_imm_commute timm:$cc))>;
2370
2371  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2372                      _.KRCWM:$mask),
2373            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2374                                                       _.RC:$src1, addr:$src2,
2375                                                       (X86cmpm_imm_commute timm:$cc))>;
2376
2377  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2378                      (_.KVT immAllOnesV)),
2379            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2380                                                       (X86cmpm_imm_commute timm:$cc))>;
2381
2382  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2383                      _.KRCWM:$mask),
2384            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2385                                                        _.RC:$src1, addr:$src2,
2386                                                        (X86cmpm_imm_commute  timm:$cc))>;
2387}
2388
2389multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2390  // comparison code form (VCMP[EQ/LT/LE/...]
2391  let Uses = [MXCSR] in
2392  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2393                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2394                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2395                     "vcmp"#_.Suffix,
2396                     "$cc, {sae}, $src2, $src1",
2397                     "$src1, $src2, {sae}, $cc",
2398                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2399                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2400                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2401                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2402                     EVEX_B, Sched<[sched]>;
2403}
2404
2405multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2406                       Predicate Pred = HasAVX512> {
2407  let Predicates = [Pred] in {
2408    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2409                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2410
2411  }
2412  let Predicates = [Pred,HasVLX] in {
2413   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2414   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2415  }
2416}
2417
2418defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2419                          AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2420defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2421                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2422defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2423                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2424
2425// Patterns to select fp compares with load as first operand.
2426let Predicates = [HasAVX512] in {
2427  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2428            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2429
2430  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2431            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2432}
2433
2434let Predicates = [HasFP16] in {
2435  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2436            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2437}
2438
2439// ----------------------------------------------------------------
2440// FPClass
2441
2442//handle fpclass instruction  mask =  op(reg_scalar,imm)
2443//                                    op(mem_scalar,imm)
2444multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2445                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2446                                 Predicate prd> {
2447  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2448      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2449                      (ins _.RC:$src1, i32u8imm:$src2),
2450                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2451                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2452                              (i32 timm:$src2)))]>,
2453                      Sched<[sched]>;
2454      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2455                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2456                      OpcodeStr#_.Suffix#
2457                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2458                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2459                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2460                                      (i32 timm:$src2))))]>,
2461                      EVEX_K, Sched<[sched]>;
2462    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2463                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2464                    OpcodeStr#_.Suffix#
2465                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2466                    [(set _.KRC:$dst,
2467                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2468                                        (i32 timm:$src2)))]>,
2469                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2470    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2471                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2472                    OpcodeStr#_.Suffix#
2473                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2474                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2475                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2476                            (i32 timm:$src2))))]>,
2477                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2478  }
2479}
2480
2481//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2482//                                  fpclass(reg_vec, mem_vec, imm)
2483//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2484multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2485                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2486                                 string mem>{
2487  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2488  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2489                      (ins _.RC:$src1, i32u8imm:$src2),
2490                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2491                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2492                                       (i32 timm:$src2)))]>,
2493                      Sched<[sched]>;
2494  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2495                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2496                      OpcodeStr#_.Suffix#
2497                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2498                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2499                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2500                                       (i32 timm:$src2))))]>,
2501                      EVEX_K, Sched<[sched]>;
2502  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2503                    (ins _.MemOp:$src1, i32u8imm:$src2),
2504                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2505                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2506                    [(set _.KRC:$dst,(X86Vfpclass
2507                                     (_.VT (_.LdFrag addr:$src1)),
2508                                     (i32 timm:$src2)))]>,
2509                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2510  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2511                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2512                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2513                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2514                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2515                                  (_.VT (_.LdFrag addr:$src1)),
2516                                  (i32 timm:$src2))))]>,
2517                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2518  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2519                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2520                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2521                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2522                                                  #_.BroadcastStr#", $src2}",
2523                    [(set _.KRC:$dst,(X86Vfpclass
2524                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2525                                     (i32 timm:$src2)))]>,
2526                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2527  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2528                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2529                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2530                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2531                                                   _.BroadcastStr#", $src2}",
2532                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2533                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2534                                     (i32 timm:$src2))))]>,
2535                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2536  }
2537
2538  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2539  // the memory form.
2540  def : InstAlias<OpcodeStr#_.Suffix#mem#
2541                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2542                  (!cast<Instruction>(NAME#"rr")
2543                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2544  def : InstAlias<OpcodeStr#_.Suffix#mem#
2545                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2546                  (!cast<Instruction>(NAME#"rrk")
2547                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2548  def : InstAlias<OpcodeStr#_.Suffix#mem#
2549                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2550                  _.BroadcastStr#", $src2}",
2551                  (!cast<Instruction>(NAME#"rmb")
2552                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2553  def : InstAlias<OpcodeStr#_.Suffix#mem#
2554                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2555                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2556                  (!cast<Instruction>(NAME#"rmbk")
2557                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2558}
2559
2560multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2561                                     bits<8> opc, X86SchedWriteWidths sched,
2562                                     Predicate prd>{
2563  let Predicates = [prd] in {
2564    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2565                                      _.info512, "z">, EVEX_V512;
2566  }
2567  let Predicates = [prd, HasVLX] in {
2568    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2569                                      _.info128, "x">, EVEX_V128;
2570    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2571                                      _.info256, "y">, EVEX_V256;
2572  }
2573}
2574
2575multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2576                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2577  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2578                                      sched, HasFP16>,
2579                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2580  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2581                                   sched.Scl, f16x_info, HasFP16>,
2582                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2583  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2584                                      sched, HasDQI>,
2585                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2586  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2587                                      sched, HasDQI>,
2588                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2589  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2590                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2591                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2592  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2594                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2595}
2596
2597defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2598
2599//-----------------------------------------------------------------
2600// Mask register copy, including
2601// - copy between mask registers
2602// - load/store mask registers
2603// - copy from GPR to mask register and vice versa
2604//
2605multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2606                          string OpcodeStr, RegisterClass KRC, ValueType vvt,
2607                          X86MemOperand x86memop, string Suffix = ""> {
2608  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2609      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2610  def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2611                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2612                  Sched<[WriteMove]>;
2613  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2614                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2615                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
2616                  Sched<[WriteLoad]>;
2617  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2618                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2619                    [(store KRC:$src, addr:$dst)]>,
2620                  Sched<[WriteStore]>;
2621}
2622
2623multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2624                               string OpcodeStr, RegisterClass KRC,
2625                               RegisterClass GRC, string Suffix = ""> {
2626  let hasSideEffects = 0 in {
2627    def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2628                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2629                    Sched<[WriteMove]>;
2630    def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2631                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2632                    Sched<[WriteMove]>;
2633  }
2634}
2635
2636let Predicates = [HasDQI, NoEGPR] in
2637  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2638               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2639               VEX, TB, PD;
2640let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2641  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2642               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2643               EVEX, TB, PD;
2644
2645let Predicates = [HasAVX512, NoEGPR] in
2646  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2647               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2648               VEX, TB;
2649let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2650  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2651               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2652               EVEX, TB;
2653
2654let Predicates = [HasBWI, NoEGPR] in {
2655  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2656               VEX, TB, PD, REX_W;
2657  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2658               VEX, TB, XD;
2659  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2660               VEX, TB, REX_W;
2661  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2662               VEX, TB, XD, REX_W;
2663}
2664let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2665  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2666               EVEX, TB, PD, REX_W;
2667  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2668               EVEX, TB, XD;
2669  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2670               EVEX, TB, REX_W;
2671  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2672               EVEX, TB, XD, REX_W;
2673}
2674
2675// GR from/to mask register
2676def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2677          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2678def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2679          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2680def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2681          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2682
2683def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2684          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2685def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2686          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2687
2688def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2689          (KMOVWrk VK16:$src)>;
2690def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2691          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2692def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2693          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2694def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2695          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2696
2697def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2698          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2699def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2700          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2701def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2702          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2703def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2704          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2705
2706def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2707          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2708def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2709          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2710def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2711          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2712def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2713          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2714
2715// Load/store kreg
2716let Predicates = [HasDQI] in {
2717  def : Pat<(v1i1 (load addr:$src)),
2718            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2719  def : Pat<(v2i1 (load addr:$src)),
2720            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2721  def : Pat<(v4i1 (load addr:$src)),
2722            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2723}
2724
2725let Predicates = [HasAVX512] in {
2726  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2727            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2728  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2729            (KMOVWkm addr:$src)>;
2730}
2731
2732def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2733                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2734                                              SDTCVecEltisVT<1, i1>,
2735                                              SDTCisPtrTy<2>]>>;
2736
2737let Predicates = [HasAVX512] in {
2738  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2739    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2740              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2741
2742    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2743              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2744
2745    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2746              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2747
2748    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2749              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2750  }
2751
2752  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2753  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2754  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2755  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2756  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2757  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2758  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2759
2760  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2761                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2762            (KMOVWkr (AND32ri
2763                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2764                      (i32 1)))>;
2765}
2766
2767// Mask unary operation
2768// - KNOT
2769multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2770                            RegisterClass KRC, SDPatternOperator OpNode,
2771                            X86FoldableSchedWrite sched, Predicate prd> {
2772  let Predicates = [prd] in
2773    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2774               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2775               [(set KRC:$dst, (OpNode KRC:$src))]>,
2776               Sched<[sched]>;
2777}
2778
2779multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2780                                SDPatternOperator OpNode,
2781                                X86FoldableSchedWrite sched> {
2782  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2783                            sched, HasDQI>, VEX, TB, PD;
2784  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2785                            sched, HasAVX512>, VEX, TB;
2786  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2787                            sched, HasBWI>, VEX, TB, PD, REX_W;
2788  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2789                            sched, HasBWI>, VEX, TB, REX_W;
2790}
2791
2792// TODO - do we need a X86SchedWriteWidths::KMASK type?
2793defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2794
2795// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2796let Predicates = [HasAVX512, NoDQI] in
2797def : Pat<(vnot VK8:$src),
2798          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2799
2800def : Pat<(vnot VK4:$src),
2801          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2802def : Pat<(vnot VK2:$src),
2803          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2804def : Pat<(vnot VK1:$src),
2805          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2806
2807// Mask binary operation
2808// - KAND, KANDN, KOR, KXNOR, KXOR
2809multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2810                           RegisterClass KRC, SDPatternOperator OpNode,
2811                           X86FoldableSchedWrite sched, Predicate prd,
2812                           bit IsCommutable> {
2813  let Predicates = [prd], isCommutable = IsCommutable in
2814    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2815               !strconcat(OpcodeStr,
2816                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2817               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2818               Sched<[sched]>;
2819}
2820
2821multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2822                                 SDPatternOperator OpNode,
2823                                 X86FoldableSchedWrite sched, bit IsCommutable,
2824                                 Predicate prdW = HasAVX512> {
2825  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2826                             sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2827  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2828                             sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2829  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2830                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2831  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2832                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2833}
2834
2835// TODO - do we need a X86SchedWriteWidths::KMASK type?
2836defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2837defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2838defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2839defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2840defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2841defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2842
2843multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2844                            Instruction Inst> {
2845  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2846  // for the DQI set, this type is legal and KxxxB instruction is used
2847  let Predicates = [NoDQI] in
2848  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2849            (COPY_TO_REGCLASS
2850              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2851                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2852
2853  // All types smaller than 8 bits require conversion anyway
2854  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2855        (COPY_TO_REGCLASS (Inst
2856                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2857                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2858  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2859        (COPY_TO_REGCLASS (Inst
2860                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2861                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2862  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2863        (COPY_TO_REGCLASS (Inst
2864                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2865                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2866}
2867
2868defm : avx512_binop_pat<and,   KANDWrr>;
2869defm : avx512_binop_pat<vandn, KANDNWrr>;
2870defm : avx512_binop_pat<or,    KORWrr>;
2871defm : avx512_binop_pat<vxnor, KXNORWrr>;
2872defm : avx512_binop_pat<xor,   KXORWrr>;
2873
2874// Mask unpacking
2875multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2876                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2877                             Predicate prd> {
2878  let Predicates = [prd] in {
2879    let hasSideEffects = 0 in
2880    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2881               (ins Src.KRC:$src1, Src.KRC:$src2),
2882               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2883               VEX, VVVV, VEX_L, Sched<[sched]>;
2884
2885    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2886              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2887  }
2888}
2889
2890defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2891defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2892defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2893
2894// Mask bit testing
2895multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2896                              SDNode OpNode, X86FoldableSchedWrite sched,
2897                              Predicate prd> {
2898  let Predicates = [prd], Defs = [EFLAGS] in
2899    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2900               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2901               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2902               Sched<[sched]>;
2903}
2904
2905multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2906                                X86FoldableSchedWrite sched,
2907                                Predicate prdW = HasAVX512> {
2908  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2909                                                                VEX, TB, PD;
2910  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2911                                                                VEX, TB;
2912  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2913                                                                VEX, TB, REX_W;
2914  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2915                                                                VEX, TB, PD, REX_W;
2916}
2917
2918// TODO - do we need a X86SchedWriteWidths::KMASK type?
2919defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2920defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2921
2922// Mask shift
2923multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2924                               SDNode OpNode, X86FoldableSchedWrite sched> {
2925  let Predicates = [HasAVX512] in
2926    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2927                 !strconcat(OpcodeStr,
2928                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2929                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2930                 Sched<[sched]>;
2931}
2932
2933multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2934                                 SDNode OpNode, X86FoldableSchedWrite sched> {
2935  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2936                               sched>, VEX, TA, PD, REX_W;
2937  let Predicates = [HasDQI] in
2938  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2939                               sched>, VEX, TA, PD;
2940  let Predicates = [HasBWI] in {
2941  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2942                               sched>, VEX, TA, PD, REX_W;
2943  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2944                               sched>, VEX, TA, PD;
2945  }
2946}
2947
2948defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2949defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2950
2951// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2952multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2953                                                 string InstStr,
2954                                                 X86VectorVTInfo Narrow,
2955                                                 X86VectorVTInfo Wide> {
2956def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2957                                (Narrow.VT Narrow.RC:$src2), cond)),
2958          (COPY_TO_REGCLASS
2959           (!cast<Instruction>(InstStr#"Zrri")
2960            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2961            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2962            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2963
2964def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2965                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2966                                                    (Narrow.VT Narrow.RC:$src2),
2967                                                    cond)))),
2968          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2969           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2970           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2971           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2972           (X86pcmpm_imm $cc)), Narrow.KRC)>;
2973}
2974
2975multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2976                                                     string InstStr,
2977                                                     X86VectorVTInfo Narrow,
2978                                                     X86VectorVTInfo Wide> {
2979// Broadcast load.
2980def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2981                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
2982          (COPY_TO_REGCLASS
2983           (!cast<Instruction>(InstStr#"Zrmib")
2984            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2985            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2986
2987def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2988                           (Narrow.KVT
2989                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2990                                         (Narrow.BroadcastLdFrag addr:$src2),
2991                                         cond)))),
2992          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
2993           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2994           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2995           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2996
2997// Commuted with broadcast load.
2998def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
2999                                (Narrow.VT Narrow.RC:$src1),
3000                                cond)),
3001          (COPY_TO_REGCLASS
3002           (!cast<Instruction>(InstStr#"Zrmib")
3003            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3004            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3005
3006def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3007                           (Narrow.KVT
3008                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3009                                         (Narrow.VT Narrow.RC:$src1),
3010                                         cond)))),
3011          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3012           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3013           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3014           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3015}
3016
3017// Same as above, but for fp types which don't use PatFrags.
3018multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3019                                                X86VectorVTInfo Narrow,
3020                                                X86VectorVTInfo Wide> {
3021def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3022                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3023          (COPY_TO_REGCLASS
3024           (!cast<Instruction>(InstStr#"Zrri")
3025            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3027            timm:$cc), Narrow.KRC)>;
3028
3029def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3030                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3031                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3032          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3033           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3034           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3035           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3036           timm:$cc), Narrow.KRC)>;
3037
3038// Broadcast load.
3039def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3040                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3041          (COPY_TO_REGCLASS
3042           (!cast<Instruction>(InstStr#"Zrmbi")
3043            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3044            addr:$src2, timm:$cc), Narrow.KRC)>;
3045
3046def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3047                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3048                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3049          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3050           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3051           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3052           addr:$src2, timm:$cc), Narrow.KRC)>;
3053
3054// Commuted with broadcast load.
3055def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3056                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3057          (COPY_TO_REGCLASS
3058           (!cast<Instruction>(InstStr#"Zrmbi")
3059            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3060            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3061
3062def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3063                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3064                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3065          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3066           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3067           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3068           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3069}
3070
3071let Predicates = [HasAVX512, NoVLX] in {
3072  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3073  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3074
3075  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3076  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3077
3078  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3079  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3080
3081  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3082  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3083
3084  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3086
3087  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3089
3090  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3092
3093  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3095
3096  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3097  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3098  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3099  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3100}
3101
3102let Predicates = [HasBWI, NoVLX] in {
3103  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3104  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3105
3106  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3107  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3108
3109  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3110  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3111
3112  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3113  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3114}
3115
3116// Mask setting all 0s or 1s
3117multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3118  let Predicates = [HasAVX512] in
3119    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3120        SchedRW = [WriteZero] in
3121      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3122                     [(set KRC:$dst, (VT Val))]>;
3123}
3124
3125multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3126  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3127  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3128  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3129}
3130
3131defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3132defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3133
3134// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3135let Predicates = [HasAVX512] in {
3136  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3137  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3138  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3139  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3140  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3141  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3142  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3143  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3144}
3145
3146// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3147multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3148                                             RegisterClass RC, ValueType VT> {
3149  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3150            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3151
3152  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3153            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3154}
3155defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3156defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3157defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3158defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3159defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3160defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3161
3162defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3163defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3164defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3165defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3166defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3167
3168defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3169defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3170defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3171defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3172
3173defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3174defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3175defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3176
3177defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3178defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3179
3180defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3181
3182//===----------------------------------------------------------------------===//
3183// AVX-512 - Aligned and unaligned load and store
3184//
3185
3186multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3187                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3188                       X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3189                       SDPatternOperator SelectOprr = vselect> {
3190  let hasSideEffects = 0 in {
3191  let isMoveReg = 1 in
3192  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3193                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3194                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3195  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3196                      (ins _.KRCWM:$mask,  _.RC:$src),
3197                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3198                       "${dst} {${mask}} {z}, $src}"),
3199                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3200                                           (_.VT _.RC:$src),
3201                                           _.ImmAllZerosV)))], _.ExeDomain>,
3202                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3203
3204  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3205  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3206                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3207                    !if(NoRMPattern, [],
3208                        [(set _.RC:$dst,
3209                          (_.VT (ld_frag addr:$src)))]),
3210                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3211
3212  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3213    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3214                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3215                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3216                      "${dst} {${mask}}, $src1}"),
3217                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3218                                          (_.VT _.RC:$src1),
3219                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3220                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3221    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3222                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3223                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3224                      "${dst} {${mask}}, $src1}"),
3225                     [(set _.RC:$dst, (_.VT
3226                         (vselect_mask _.KRCWM:$mask,
3227                          (_.VT (ld_frag addr:$src1)),
3228                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3229                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3230  }
3231  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3232                  (ins _.KRCWM:$mask, _.MemOp:$src),
3233                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3234                                "${dst} {${mask}} {z}, $src}",
3235                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3236                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3237                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3238  }
3239  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3240            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3241
3242  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3243            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3244
3245  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3246            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3247             _.KRCWM:$mask, addr:$ptr)>;
3248}
3249
3250multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3251                                 AVX512VLVectorVTInfo _, Predicate prd,
3252                                 X86SchedWriteMoveLSWidths Sched,
3253                                 bit NoRMPattern = 0> {
3254  let Predicates = [prd] in
3255  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3256                       _.info512.AlignedLdFrag, masked_load_aligned,
3257                       Sched.ZMM, NoRMPattern>, EVEX_V512;
3258
3259  let Predicates = [prd, HasVLX] in {
3260  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3261                          _.info256.AlignedLdFrag, masked_load_aligned,
3262                          Sched.YMM, NoRMPattern>, EVEX_V256;
3263  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3264                          _.info128.AlignedLdFrag, masked_load_aligned,
3265                          Sched.XMM, NoRMPattern>, EVEX_V128;
3266  }
3267}
3268
3269multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3270                          AVX512VLVectorVTInfo _, Predicate prd,
3271                          X86SchedWriteMoveLSWidths Sched,
3272                          bit NoRMPattern = 0,
3273                          SDPatternOperator SelectOprr = vselect> {
3274  let Predicates = [prd] in
3275  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3276                       masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3277
3278  let Predicates = [prd, HasVLX] in {
3279  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3280                         masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3281  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3282                         masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3283  }
3284}
3285
3286multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3287                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3288                        X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3289  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3290  let isMoveReg = 1 in
3291  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3292                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3293                         [], _.ExeDomain>, EVEX,
3294                         Sched<[Sched.RR]>;
3295  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3296                         (ins _.KRCWM:$mask, _.RC:$src),
3297                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3298                         "${dst} {${mask}}, $src}",
3299                         [], _.ExeDomain>,  EVEX, EVEX_K,
3300                         Sched<[Sched.RR]>;
3301  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3302                          (ins _.KRCWM:$mask, _.RC:$src),
3303                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3304                          "${dst} {${mask}} {z}, $src}",
3305                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3306                          Sched<[Sched.RR]>;
3307  }
3308
3309  let hasSideEffects = 0, mayStore = 1 in
3310  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3311                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3312                    !if(NoMRPattern, [],
3313                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3314                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3315  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3316                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3317              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3318               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3319
3320  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3321           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3322                                                        _.KRCWM:$mask, _.RC:$src)>;
3323
3324  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3325                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3326                   _.RC:$dst, _.RC:$src), 0>;
3327  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3328                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3329                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3330  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3331                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3332                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3333}
3334
3335multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3336                            AVX512VLVectorVTInfo _, Predicate prd,
3337                            X86SchedWriteMoveLSWidths Sched,
3338                            bit NoMRPattern = 0> {
3339  let Predicates = [prd] in
3340  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3341                        masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3342  let Predicates = [prd, HasVLX] in {
3343    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3344                             masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3345    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3346                             masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3347  }
3348}
3349
3350multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3351                                  AVX512VLVectorVTInfo _, Predicate prd,
3352                                  X86SchedWriteMoveLSWidths Sched,
3353                                  bit NoMRPattern = 0> {
3354  let Predicates = [prd] in
3355  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3356                        masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3357
3358  let Predicates = [prd, HasVLX] in {
3359    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3360                             masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3361    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3362                             masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3363  }
3364}
3365
3366defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3367                                     HasAVX512, SchedWriteFMoveLS>,
3368               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3369                                      HasAVX512, SchedWriteFMoveLS>,
3370               TB, EVEX_CD8<32, CD8VF>;
3371
3372defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3373                                     HasAVX512, SchedWriteFMoveLS>,
3374               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3375                                      HasAVX512, SchedWriteFMoveLS>,
3376               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3377
3378defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3379                              SchedWriteFMoveLS, 0, null_frag>,
3380               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3381                               SchedWriteFMoveLS>,
3382                               TB, EVEX_CD8<32, CD8VF>;
3383
3384defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3385                              SchedWriteFMoveLS, 0, null_frag>,
3386               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3387                               SchedWriteFMoveLS>,
3388               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3389
3390defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3391                                       HasAVX512, SchedWriteVecMoveLS, 1>,
3392                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3393                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3394                 TB, PD, EVEX_CD8<32, CD8VF>;
3395
3396defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3397                                       HasAVX512, SchedWriteVecMoveLS>,
3398                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3399                                        HasAVX512, SchedWriteVecMoveLS>,
3400                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3401
3402defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3403                               SchedWriteVecMoveLS, 1>,
3404                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3405                                SchedWriteVecMoveLS, 1>,
3406                TB, XD, EVEX_CD8<8, CD8VF>;
3407
3408defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3409                                SchedWriteVecMoveLS, 1>,
3410                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3411                                 SchedWriteVecMoveLS, 1>,
3412                 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3413
3414defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3415                                SchedWriteVecMoveLS, 1, null_frag>,
3416                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3417                                 SchedWriteVecMoveLS, 1>,
3418                 TB, XS, EVEX_CD8<32, CD8VF>;
3419
3420defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3421                                SchedWriteVecMoveLS, 0, null_frag>,
3422                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3423                                 SchedWriteVecMoveLS>,
3424                 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3425
3426// Special instructions to help with spilling when we don't have VLX. We need
3427// to load or store from a ZMM register instead. These are converted in
3428// expandPostRAPseudos.
3429let isReMaterializable = 1, canFoldAsLoad = 1,
3430    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3431def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3432                            "", []>, Sched<[WriteFLoadX]>;
3433def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3434                            "", []>, Sched<[WriteFLoadY]>;
3435def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3436                            "", []>, Sched<[WriteFLoadX]>;
3437def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3438                            "", []>, Sched<[WriteFLoadY]>;
3439}
3440
3441let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3442def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3443                            "", []>, Sched<[WriteFStoreX]>;
3444def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3445                            "", []>, Sched<[WriteFStoreY]>;
3446def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3447                            "", []>, Sched<[WriteFStoreX]>;
3448def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3449                            "", []>, Sched<[WriteFStoreY]>;
3450}
3451
3452def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3453                          (v8i64 VR512:$src))),
3454   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3455                                              VK8), VR512:$src)>;
3456
3457def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3458                           (v16i32 VR512:$src))),
3459                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3460
3461// These patterns exist to prevent the above patterns from introducing a second
3462// mask inversion when one already exists.
3463def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3464                          (v8i64 immAllZerosV),
3465                          (v8i64 VR512:$src))),
3466                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3467def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3468                           (v16i32 immAllZerosV),
3469                           (v16i32 VR512:$src))),
3470                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3471
3472multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3473                              X86VectorVTInfo Wide> {
3474 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3475                               Narrow.RC:$src1, Narrow.RC:$src0)),
3476           (EXTRACT_SUBREG
3477            (Wide.VT
3478             (!cast<Instruction>(InstrStr#"rrk")
3479              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3480              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3481              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3482            Narrow.SubRegIdx)>;
3483
3484 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3485                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3486           (EXTRACT_SUBREG
3487            (Wide.VT
3488             (!cast<Instruction>(InstrStr#"rrkz")
3489              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3490              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3491            Narrow.SubRegIdx)>;
3492}
3493
3494// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3495// available. Use a 512-bit operation and extract.
3496let Predicates = [HasAVX512, NoVLX] in {
3497  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3498  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3499  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3500  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3501
3502  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3503  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3504  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3505  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3506}
3507
3508let Predicates = [HasBWI, NoVLX] in {
3509  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3510  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3511
3512  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3513  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3514
3515  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3516  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3517
3518  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3519  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3520}
3521
3522let Predicates = [HasAVX512] in {
3523  // 512-bit load.
3524  def : Pat<(alignedloadv16i32 addr:$src),
3525            (VMOVDQA64Zrm addr:$src)>;
3526  def : Pat<(alignedloadv32i16 addr:$src),
3527            (VMOVDQA64Zrm addr:$src)>;
3528  def : Pat<(alignedloadv32f16 addr:$src),
3529            (VMOVAPSZrm addr:$src)>;
3530  def : Pat<(alignedloadv32bf16 addr:$src),
3531            (VMOVAPSZrm addr:$src)>;
3532  def : Pat<(alignedloadv64i8 addr:$src),
3533            (VMOVDQA64Zrm addr:$src)>;
3534  def : Pat<(loadv16i32 addr:$src),
3535            (VMOVDQU64Zrm addr:$src)>;
3536  def : Pat<(loadv32i16 addr:$src),
3537            (VMOVDQU64Zrm addr:$src)>;
3538  def : Pat<(loadv32f16 addr:$src),
3539            (VMOVUPSZrm addr:$src)>;
3540  def : Pat<(loadv32bf16 addr:$src),
3541            (VMOVUPSZrm addr:$src)>;
3542  def : Pat<(loadv64i8 addr:$src),
3543            (VMOVDQU64Zrm addr:$src)>;
3544
3545  // 512-bit store.
3546  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3547            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3548  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3549            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3550  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3551            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3552  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3553            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3554  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3555            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3556  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3557            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3558  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3559            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3560  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3561            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3562  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3563            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3564  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3565            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3566}
3567
3568let Predicates = [HasVLX] in {
3569  // 128-bit load.
3570  def : Pat<(alignedloadv4i32 addr:$src),
3571            (VMOVDQA64Z128rm addr:$src)>;
3572  def : Pat<(alignedloadv8i16 addr:$src),
3573            (VMOVDQA64Z128rm addr:$src)>;
3574  def : Pat<(alignedloadv8f16 addr:$src),
3575            (VMOVAPSZ128rm addr:$src)>;
3576  def : Pat<(alignedloadv8bf16 addr:$src),
3577            (VMOVAPSZ128rm addr:$src)>;
3578  def : Pat<(alignedloadv16i8 addr:$src),
3579            (VMOVDQA64Z128rm addr:$src)>;
3580  def : Pat<(loadv4i32 addr:$src),
3581            (VMOVDQU64Z128rm addr:$src)>;
3582  def : Pat<(loadv8i16 addr:$src),
3583            (VMOVDQU64Z128rm addr:$src)>;
3584  def : Pat<(loadv8f16 addr:$src),
3585            (VMOVUPSZ128rm addr:$src)>;
3586  def : Pat<(loadv8bf16 addr:$src),
3587            (VMOVUPSZ128rm addr:$src)>;
3588  def : Pat<(loadv16i8 addr:$src),
3589            (VMOVDQU64Z128rm addr:$src)>;
3590
3591  // 128-bit store.
3592  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3593            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3594  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3595            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3596  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3597            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3598  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3599            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3600  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3601            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3602  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3603            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3604  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3605            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3606  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3607            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3608  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3609            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3610  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3611            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3612
3613  // 256-bit load.
3614  def : Pat<(alignedloadv8i32 addr:$src),
3615            (VMOVDQA64Z256rm addr:$src)>;
3616  def : Pat<(alignedloadv16i16 addr:$src),
3617            (VMOVDQA64Z256rm addr:$src)>;
3618  def : Pat<(alignedloadv16f16 addr:$src),
3619            (VMOVAPSZ256rm addr:$src)>;
3620  def : Pat<(alignedloadv16bf16 addr:$src),
3621            (VMOVAPSZ256rm addr:$src)>;
3622  def : Pat<(alignedloadv32i8 addr:$src),
3623            (VMOVDQA64Z256rm addr:$src)>;
3624  def : Pat<(loadv8i32 addr:$src),
3625            (VMOVDQU64Z256rm addr:$src)>;
3626  def : Pat<(loadv16i16 addr:$src),
3627            (VMOVDQU64Z256rm addr:$src)>;
3628  def : Pat<(loadv16f16 addr:$src),
3629            (VMOVUPSZ256rm addr:$src)>;
3630  def : Pat<(loadv16bf16 addr:$src),
3631            (VMOVUPSZ256rm addr:$src)>;
3632  def : Pat<(loadv32i8 addr:$src),
3633            (VMOVDQU64Z256rm addr:$src)>;
3634
3635  // 256-bit store.
3636  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3637            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3638  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3639            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3640  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3641            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3642  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3643            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3644  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3645            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3646  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3647            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3648  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3649            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3650  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3651            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3652  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3653            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3654  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3655            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3656}
3657
3658multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3659let Predicates = [HasBWI] in {
3660  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3661            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3662  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3663            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3664  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3665                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3666            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3667  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3668                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3669            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3670  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3671                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3672            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3673  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3674                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3675            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3676  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3677            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3678  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3679            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3680  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3681            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3682
3683  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3684            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3685}
3686let Predicates = [HasBWI, HasVLX] in {
3687  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3688            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3689  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3690            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3691  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3692                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3693            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3694  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3695                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3696            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3697  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3698                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3699            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3700  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3701                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3702            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3703  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3704            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3705  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3706            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3707  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3708            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3709
3710  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3711            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3712
3713  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3714            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3715  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3716            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3717  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3718                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3719            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3720  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3721                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3722            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3723  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3724                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3725            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3726  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3727                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3728            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3729  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3730            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3731  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3732            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3733  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3734            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3735
3736  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3737            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3738}
3739}
3740
3741defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3742defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3743
3744// Move Int Doubleword to Packed Double Int
3745//
3746let ExeDomain = SSEPackedInt in {
3747def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3748                      "vmovd\t{$src, $dst|$dst, $src}",
3749                      [(set VR128X:$dst,
3750                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3751                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3752def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3753                      "vmovd\t{$src, $dst|$dst, $src}",
3754                      [(set VR128X:$dst,
3755                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3756                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3757def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3758                      "vmovq\t{$src, $dst|$dst, $src}",
3759                        [(set VR128X:$dst,
3760                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3761                      EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3762let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3763def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3764                      (ins i64mem:$src),
3765                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3766                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3767let isCodeGenOnly = 1 in {
3768def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3769                       "vmovq\t{$src, $dst|$dst, $src}",
3770                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3771                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3772def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3773                         "vmovq\t{$src, $dst|$dst, $src}",
3774                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3775                         EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3776}
3777} // ExeDomain = SSEPackedInt
3778
3779// Move Int Doubleword to Single Scalar
3780//
3781let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3782def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3783                      "vmovd\t{$src, $dst|$dst, $src}",
3784                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3785                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3786} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3787
3788// Move doubleword from xmm register to r/m32
3789//
3790let ExeDomain = SSEPackedInt in {
3791def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3792                       "vmovd\t{$src, $dst|$dst, $src}",
3793                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3794                                        (iPTR 0)))]>,
3795                       EVEX, Sched<[WriteVecMoveToGpr]>;
3796def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3797                       (ins i32mem:$dst, VR128X:$src),
3798                       "vmovd\t{$src, $dst|$dst, $src}",
3799                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3800                                     (iPTR 0))), addr:$dst)]>,
3801                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3802} // ExeDomain = SSEPackedInt
3803
3804// Move quadword from xmm1 register to r/m64
3805//
3806let ExeDomain = SSEPackedInt in {
3807def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3808                      "vmovq\t{$src, $dst|$dst, $src}",
3809                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3810                                                   (iPTR 0)))]>,
3811                      TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3812                      Requires<[HasAVX512]>;
3813
3814let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3815def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3816                      "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3817                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3818                      Requires<[HasAVX512, In64BitMode]>;
3819
3820def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3821                      (ins i64mem:$dst, VR128X:$src),
3822                      "vmovq\t{$src, $dst|$dst, $src}",
3823                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3824                              addr:$dst)]>,
3825                      EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3826                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3827
3828let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3829def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3830                             (ins VR128X:$src),
3831                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3832                             EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3833} // ExeDomain = SSEPackedInt
3834
3835def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3836                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3837
3838let Predicates = [HasAVX512] in {
3839  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3840            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3841}
3842
3843// Move Scalar Single to Double Int
3844//
3845let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3846def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3847                      (ins FR32X:$src),
3848                      "vmovd\t{$src, $dst|$dst, $src}",
3849                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3850                      EVEX, Sched<[WriteVecMoveToGpr]>;
3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3852
3853// Move Quadword Int to Packed Quadword Int
3854//
3855let ExeDomain = SSEPackedInt in {
3856def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3857                      (ins i64mem:$src),
3858                      "vmovq\t{$src, $dst|$dst, $src}",
3859                      [(set VR128X:$dst,
3860                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3861                      EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3862} // ExeDomain = SSEPackedInt
3863
3864// Allow "vmovd" but print "vmovq".
3865def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3866                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3867def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3868                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3869
3870// Conversions between masks and scalar fp.
3871def : Pat<(v32i1 (bitconvert FR32X:$src)),
3872          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3873def : Pat<(f32 (bitconvert VK32:$src)),
3874          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3875
3876def : Pat<(v64i1 (bitconvert FR64X:$src)),
3877          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3878def : Pat<(f64 (bitconvert VK64:$src)),
3879          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3880
3881//===----------------------------------------------------------------------===//
3882// AVX-512  MOVSH, MOVSS, MOVSD
3883//===----------------------------------------------------------------------===//
3884
3885multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3886                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
3887  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3888  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3889             (ins _.RC:$src1, _.RC:$src2),
3890             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3891             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3892             _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3893  let Predicates = [prd] in {
3894  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3895              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3896              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3897              "$dst {${mask}} {z}, $src1, $src2}"),
3898              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3899                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3900                                      _.ImmAllZerosV)))],
3901              _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3902  let Constraints = "$src0 = $dst"  in
3903  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3904             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3905             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3906             "$dst {${mask}}, $src1, $src2}"),
3907             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3908                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3909                                     (_.VT _.RC:$src0))))],
3910             _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3911  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3912  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3913             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3914             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3915             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3916  // _alt version uses FR32/FR64 register class.
3917  let isCodeGenOnly = 1 in
3918  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3919                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3920                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3921                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3922  }
3923  let mayLoad = 1, hasSideEffects = 0 in {
3924    let Constraints = "$src0 = $dst" in
3925    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3926               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3927               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3928               "$dst {${mask}}, $src}"),
3929               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3930    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3931               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3932               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3933               "$dst {${mask}} {z}, $src}"),
3934               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3935  }
3936  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3937             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3938             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3939             EVEX, Sched<[WriteFStore]>;
3940  let mayStore = 1, hasSideEffects = 0 in
3941  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3942              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3943              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3944              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3945  }
3946}
3947
3948defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3949                                  VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3950
3951defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3952                                  VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3953
3954defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3955                                  HasFP16>,
3956                                  VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3957
3958multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3959                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3960
3961def : Pat<(_.VT (OpNode _.RC:$src0,
3962                        (_.VT (scalar_to_vector
3963                                  (_.EltVT (X86selects VK1WM:$mask,
3964                                                       (_.EltVT _.FRC:$src1),
3965                                                       (_.EltVT _.FRC:$src2))))))),
3966          (!cast<Instruction>(InstrStr#rrk)
3967                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3968                        VK1WM:$mask,
3969                        (_.VT _.RC:$src0),
3970                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3971
3972def : Pat<(_.VT (OpNode _.RC:$src0,
3973                        (_.VT (scalar_to_vector
3974                                  (_.EltVT (X86selects VK1WM:$mask,
3975                                                       (_.EltVT _.FRC:$src1),
3976                                                       (_.EltVT ZeroFP))))))),
3977          (!cast<Instruction>(InstrStr#rrkz)
3978                        VK1WM:$mask,
3979                        (_.VT _.RC:$src0),
3980                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3981}
3982
3983multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3984                                        dag Mask, RegisterClass MaskRC> {
3985
3986def : Pat<(masked_store
3987             (_.info512.VT (insert_subvector undef,
3988                               (_.info128.VT _.info128.RC:$src),
3989                               (iPTR 0))), addr:$dst, Mask),
3990          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3991                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3992                      _.info128.RC:$src)>;
3993
3994}
3995
3996multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3997                                               AVX512VLVectorVTInfo _,
3998                                               dag Mask, RegisterClass MaskRC,
3999                                               SubRegIndex subreg> {
4000
4001def : Pat<(masked_store
4002             (_.info512.VT (insert_subvector undef,
4003                               (_.info128.VT _.info128.RC:$src),
4004                               (iPTR 0))), addr:$dst, Mask),
4005          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4006                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4007                      _.info128.RC:$src)>;
4008
4009}
4010
4011// This matches the more recent codegen from clang that avoids emitting a 512
4012// bit masked store directly. Codegen will widen 128-bit masked store to 512
4013// bits on AVX512F only targets.
4014multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4015                                               AVX512VLVectorVTInfo _,
4016                                               dag Mask512, dag Mask128,
4017                                               RegisterClass MaskRC,
4018                                               SubRegIndex subreg> {
4019
4020// AVX512F pattern.
4021def : Pat<(masked_store
4022             (_.info512.VT (insert_subvector undef,
4023                               (_.info128.VT _.info128.RC:$src),
4024                               (iPTR 0))), addr:$dst, Mask512),
4025          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4026                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4027                      _.info128.RC:$src)>;
4028
4029// AVX512VL pattern.
4030def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4031          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4032                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4033                      _.info128.RC:$src)>;
4034}
4035
4036multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4037                                       dag Mask, RegisterClass MaskRC> {
4038
4039def : Pat<(_.info128.VT (extract_subvector
4040                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4041                                        _.info512.ImmAllZerosV)),
4042                           (iPTR 0))),
4043          (!cast<Instruction>(InstrStr#rmkz)
4044                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4045                      addr:$srcAddr)>;
4046
4047def : Pat<(_.info128.VT (extract_subvector
4048                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4049                      (_.info512.VT (insert_subvector undef,
4050                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4051                            (iPTR 0))))),
4052                (iPTR 0))),
4053          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4054                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4055                      addr:$srcAddr)>;
4056
4057}
4058
4059multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4060                                              AVX512VLVectorVTInfo _,
4061                                              dag Mask, RegisterClass MaskRC,
4062                                              SubRegIndex subreg> {
4063
4064def : Pat<(_.info128.VT (extract_subvector
4065                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4066                                        _.info512.ImmAllZerosV)),
4067                           (iPTR 0))),
4068          (!cast<Instruction>(InstrStr#rmkz)
4069                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4070                      addr:$srcAddr)>;
4071
4072def : Pat<(_.info128.VT (extract_subvector
4073                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4074                      (_.info512.VT (insert_subvector undef,
4075                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4076                            (iPTR 0))))),
4077                (iPTR 0))),
4078          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4079                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4080                      addr:$srcAddr)>;
4081
4082}
4083
4084// This matches the more recent codegen from clang that avoids emitting a 512
4085// bit masked load directly. Codegen will widen 128-bit masked load to 512
4086// bits on AVX512F only targets.
4087multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4088                                              AVX512VLVectorVTInfo _,
4089                                              dag Mask512, dag Mask128,
4090                                              RegisterClass MaskRC,
4091                                              SubRegIndex subreg> {
4092// AVX512F patterns.
4093def : Pat<(_.info128.VT (extract_subvector
4094                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4095                                        _.info512.ImmAllZerosV)),
4096                           (iPTR 0))),
4097          (!cast<Instruction>(InstrStr#rmkz)
4098                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4099                      addr:$srcAddr)>;
4100
4101def : Pat<(_.info128.VT (extract_subvector
4102                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4103                      (_.info512.VT (insert_subvector undef,
4104                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4105                            (iPTR 0))))),
4106                (iPTR 0))),
4107          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4108                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4109                      addr:$srcAddr)>;
4110
4111// AVX512Vl patterns.
4112def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4113                         _.info128.ImmAllZerosV)),
4114          (!cast<Instruction>(InstrStr#rmkz)
4115                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4116                      addr:$srcAddr)>;
4117
4118def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4119                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4120          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4121                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4122                      addr:$srcAddr)>;
4123}
4124
4125defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4126defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4127
4128defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4129                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4130defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4131                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4132defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4133                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4134
4135let Predicates = [HasFP16] in {
4136defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4137defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4138                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4139defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4140                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4141defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4142                   (v32i1 (insert_subvector
4143                           (v32i1 immAllZerosV),
4144                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4145                           (iPTR 0))),
4146                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4147                   GR8, sub_8bit>;
4148
4149defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154                   (v32i1 (insert_subvector
4155                           (v32i1 immAllZerosV),
4156                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4157                           (iPTR 0))),
4158                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                   GR8, sub_8bit>;
4160
4161def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4162          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4163           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4164           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4165           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4166
4167def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4168          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4169           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4170}
4171
4172defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4173                   (v16i1 (insert_subvector
4174                           (v16i1 immAllZerosV),
4175                           (v4i1 (extract_subvector
4176                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4177                                  (iPTR 0))),
4178                           (iPTR 0))),
4179                   (v4i1 (extract_subvector
4180                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4181                          (iPTR 0))), GR8, sub_8bit>;
4182defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4183                   (v8i1
4184                    (extract_subvector
4185                     (v16i1
4186                      (insert_subvector
4187                       (v16i1 immAllZerosV),
4188                       (v2i1 (extract_subvector
4189                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4190                              (iPTR 0))),
4191                       (iPTR 0))),
4192                     (iPTR 0))),
4193                   (v2i1 (extract_subvector
4194                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4195                          (iPTR 0))), GR8, sub_8bit>;
4196
4197defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4198                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4199defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4200                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4201defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4202                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4203
4204defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4205                   (v16i1 (insert_subvector
4206                           (v16i1 immAllZerosV),
4207                           (v4i1 (extract_subvector
4208                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4209                                  (iPTR 0))),
4210                           (iPTR 0))),
4211                   (v4i1 (extract_subvector
4212                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4213                          (iPTR 0))), GR8, sub_8bit>;
4214defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4215                   (v8i1
4216                    (extract_subvector
4217                     (v16i1
4218                      (insert_subvector
4219                       (v16i1 immAllZerosV),
4220                       (v2i1 (extract_subvector
4221                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4222                              (iPTR 0))),
4223                       (iPTR 0))),
4224                     (iPTR 0))),
4225                   (v2i1 (extract_subvector
4226                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4227                          (iPTR 0))), GR8, sub_8bit>;
4228
4229def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4230          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4231           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4232           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4233           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4234
4235def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4236          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4237           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4238
4239def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4240          (COPY_TO_REGCLASS
4241           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4242                                                       VK1WM:$mask, addr:$src)),
4243           FR32X)>;
4244def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4245          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4246
4247def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4248          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4249           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4250           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4251           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4252
4253def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4254          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4256
4257def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4258          (COPY_TO_REGCLASS
4259           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4260                                                       VK1WM:$mask, addr:$src)),
4261           FR64X)>;
4262def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4263          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4264
4265
4266def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4267          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4268def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4269          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4270
4271def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4272          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4273def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4274          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4275
4276let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4277  let Predicates = [HasFP16] in {
4278    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4279        (ins VR128X:$src1, VR128X:$src2),
4280        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4281        []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4282        Sched<[SchedWriteFShuffle.XMM]>;
4283
4284    let Constraints = "$src0 = $dst" in
4285    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4286        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4287         VR128X:$src1, VR128X:$src2),
4288        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4289          "$dst {${mask}}, $src1, $src2}",
4290        []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4291        Sched<[SchedWriteFShuffle.XMM]>;
4292
4293    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4294        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4295        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4296          "$dst {${mask}} {z}, $src1, $src2}",
4297        []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4298        Sched<[SchedWriteFShuffle.XMM]>;
4299  }
4300  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301                           (ins VR128X:$src1, VR128X:$src2),
4302                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303                           []>, TB, XS, EVEX, VVVV, VEX_LIG,
4304                           Sched<[SchedWriteFShuffle.XMM]>;
4305
4306  let Constraints = "$src0 = $dst" in
4307  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4309                                                   VR128X:$src1, VR128X:$src2),
4310                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4311                                        "$dst {${mask}}, $src1, $src2}",
4312                             []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4313                             Sched<[SchedWriteFShuffle.XMM]>;
4314
4315  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4317                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4318                                    "$dst {${mask}} {z}, $src1, $src2}",
4319                         []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4320                         Sched<[SchedWriteFShuffle.XMM]>;
4321
4322  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4323                           (ins VR128X:$src1, VR128X:$src2),
4324                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325                           []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4326                           Sched<[SchedWriteFShuffle.XMM]>;
4327
4328  let Constraints = "$src0 = $dst" in
4329  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4330                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4331                                                   VR128X:$src1, VR128X:$src2),
4332                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4333                                        "$dst {${mask}}, $src1, $src2}",
4334                             []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4335                             REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4336
4337  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4338                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4339                                                          VR128X:$src2),
4340                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4341                                         "$dst {${mask}} {z}, $src1, $src2}",
4342                              []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4343                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4344}
4345
4346def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4349                             "$dst {${mask}}, $src1, $src2}",
4350                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4351                                VR128X:$src1, VR128X:$src2), 0>;
4352def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                             "$dst {${mask}} {z}, $src1, $src2}",
4354                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355                                 VR128X:$src1, VR128X:$src2), 0>;
4356def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4357                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4358def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4359                             "$dst {${mask}}, $src1, $src2}",
4360                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4361                                VR128X:$src1, VR128X:$src2), 0>;
4362def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4363                             "$dst {${mask}} {z}, $src1, $src2}",
4364                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4365                                 VR128X:$src1, VR128X:$src2), 0>;
4366def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4367                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4368def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4369                             "$dst {${mask}}, $src1, $src2}",
4370                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4371                                VR128X:$src1, VR128X:$src2), 0>;
4372def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4373                             "$dst {${mask}} {z}, $src1, $src2}",
4374                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4375                                 VR128X:$src1, VR128X:$src2), 0>;
4376
4377let Predicates = [HasAVX512, OptForSize] in {
4378  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4379            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4380  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4381            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4382
4383  // Move low f32 and clear high bits.
4384  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4385            (SUBREG_TO_REG (i32 0),
4386             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4387              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4388  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4389            (SUBREG_TO_REG (i32 0),
4390             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4391              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4392
4393  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4394            (SUBREG_TO_REG (i32 0),
4395             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4396              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4397  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398            (SUBREG_TO_REG (i32 0),
4399             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4400              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4401}
4402
4403// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4404// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4405let Predicates = [HasAVX512, OptForSpeed] in {
4406  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4407            (SUBREG_TO_REG (i32 0),
4408             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4409                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4410                          (i8 1))), sub_xmm)>;
4411  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4412            (SUBREG_TO_REG (i32 0),
4413             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4414                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4415                          (i8 3))), sub_xmm)>;
4416}
4417
4418let Predicates = [HasAVX512] in {
4419  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4420            (VMOVSSZrm addr:$src)>;
4421  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4422            (VMOVSDZrm addr:$src)>;
4423
4424  // Represent the same patterns above but in the form they appear for
4425  // 256-bit types
4426  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4427            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4428  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4429            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4430
4431  // Represent the same patterns above but in the form they appear for
4432  // 512-bit types
4433  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4434            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4435  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4436            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4437}
4438let Predicates = [HasFP16] in {
4439  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4440            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4441  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4442            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4443
4444  // FIXME we need better canonicalization in dag combine
4445  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4446            (SUBREG_TO_REG (i32 0),
4447             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4448              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4449  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4450            (SUBREG_TO_REG (i32 0),
4451             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4452              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4453
4454  // FIXME we need better canonicalization in dag combine
4455  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4456            (SUBREG_TO_REG (i32 0),
4457             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4458              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4459  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4460            (SUBREG_TO_REG (i32 0),
4461             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4462              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4463
4464  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4465            (VMOVSHZrm addr:$src)>;
4466
4467  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4468            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4469
4470  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4471            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4472}
4473
4474let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4475def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4476                                (ins VR128X:$src),
4477                                "vmovq\t{$src, $dst|$dst, $src}",
4478                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4479                                                   (v2i64 VR128X:$src))))]>,
4480                                EVEX, REX_W;
4481}
4482
4483let Predicates = [HasAVX512] in {
4484  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4485            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4486                                              GR8:$src, sub_8bit)))>;
4487  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4488            (VMOVDI2PDIZrr GR32:$src)>;
4489
4490  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4491            (VMOV64toPQIZrr GR64:$src)>;
4492
4493  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4494  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4495            (VMOVDI2PDIZrm addr:$src)>;
4496  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4497            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4498  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4499            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4500  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4501            (VMOVQI2PQIZrm addr:$src)>;
4502  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4503            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4504
4505  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4506  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4507            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4508  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4509            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4510
4511  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4512            (SUBREG_TO_REG (i32 0),
4513             (v2f64 (VMOVZPQILo2PQIZrr
4514                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4515             sub_xmm)>;
4516  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4517            (SUBREG_TO_REG (i32 0),
4518             (v2i64 (VMOVZPQILo2PQIZrr
4519                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4520             sub_xmm)>;
4521
4522  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4523            (SUBREG_TO_REG (i32 0),
4524             (v2f64 (VMOVZPQILo2PQIZrr
4525                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4526             sub_xmm)>;
4527  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4528            (SUBREG_TO_REG (i32 0),
4529             (v2i64 (VMOVZPQILo2PQIZrr
4530                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4531             sub_xmm)>;
4532}
4533
4534//===----------------------------------------------------------------------===//
4535// AVX-512 - Non-temporals
4536//===----------------------------------------------------------------------===//
4537
4538def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4539                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4540                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4541                      EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4542
4543let Predicates = [HasVLX] in {
4544  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4545                       (ins i256mem:$src),
4546                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4547                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4548                       EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4549
4550  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4551                      (ins i128mem:$src),
4552                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4553                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4554                      EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4555}
4556
4557multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4558                        X86SchedWriteMoveLS Sched,
4559                        PatFrag st_frag = alignednontemporalstore> {
4560  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4561  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4562                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4563                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4564                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4565}
4566
4567multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4568                           AVX512VLVectorVTInfo VTInfo,
4569                           X86SchedWriteMoveLSWidths Sched> {
4570  let Predicates = [HasAVX512] in
4571    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4572
4573  let Predicates = [HasAVX512, HasVLX] in {
4574    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4575    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4576  }
4577}
4578
4579defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4580                                SchedWriteVecMoveLSNT>, TB, PD;
4581defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4582                                SchedWriteFMoveLSNT>, TB, PD, REX_W;
4583defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4584                                SchedWriteFMoveLSNT>, TB;
4585
4586let Predicates = [HasAVX512], AddedComplexity = 400 in {
4587  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4588            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4589  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4590            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4591  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4592            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4593
4594  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4595            (VMOVNTDQAZrm addr:$src)>;
4596  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4597            (VMOVNTDQAZrm addr:$src)>;
4598  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4599            (VMOVNTDQAZrm addr:$src)>;
4600  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4601            (VMOVNTDQAZrm addr:$src)>;
4602  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4603            (VMOVNTDQAZrm addr:$src)>;
4604  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4605            (VMOVNTDQAZrm addr:$src)>;
4606}
4607
4608let Predicates = [HasVLX], AddedComplexity = 400 in {
4609  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4610            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4611  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4612            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4613  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4614            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4615
4616  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4617            (VMOVNTDQAZ256rm addr:$src)>;
4618  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4619            (VMOVNTDQAZ256rm addr:$src)>;
4620  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4621            (VMOVNTDQAZ256rm addr:$src)>;
4622  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4623            (VMOVNTDQAZ256rm addr:$src)>;
4624  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4625            (VMOVNTDQAZ256rm addr:$src)>;
4626  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4627            (VMOVNTDQAZ256rm addr:$src)>;
4628
4629  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4630            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4631  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4632            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4633  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4634            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4635
4636  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4637            (VMOVNTDQAZ128rm addr:$src)>;
4638  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4639            (VMOVNTDQAZ128rm addr:$src)>;
4640  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4641            (VMOVNTDQAZ128rm addr:$src)>;
4642  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4643            (VMOVNTDQAZ128rm addr:$src)>;
4644  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4645            (VMOVNTDQAZ128rm addr:$src)>;
4646  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4647            (VMOVNTDQAZ128rm addr:$src)>;
4648}
4649
4650//===----------------------------------------------------------------------===//
4651// AVX-512 - Integer arithmetic
4652//
4653multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4654                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4655                           bit IsCommutable = 0> {
4656  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4657                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4658                    "$src2, $src1", "$src1, $src2",
4659                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4660                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4661                    Sched<[sched]>;
4662
4663  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4664                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4665                  "$src2, $src1", "$src1, $src2",
4666                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4667                  AVX512BIBase, EVEX, VVVV,
4668                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4669}
4670
4671multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4672                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4673                            bit IsCommutable = 0> :
4674           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4675  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4677                  "${src2}"#_.BroadcastStr#", $src1",
4678                  "$src1, ${src2}"#_.BroadcastStr,
4679                  (_.VT (OpNode _.RC:$src1,
4680                                (_.BroadcastLdFrag addr:$src2)))>,
4681                  AVX512BIBase, EVEX, VVVV, EVEX_B,
4682                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4683}
4684
4685multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4686                              AVX512VLVectorVTInfo VTInfo,
4687                              X86SchedWriteWidths sched, Predicate prd,
4688                              bit IsCommutable = 0> {
4689  let Predicates = [prd] in
4690    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4691                             IsCommutable>, EVEX_V512;
4692
4693  let Predicates = [prd, HasVLX] in {
4694    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4695                                sched.YMM, IsCommutable>, EVEX_V256;
4696    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4697                                sched.XMM, IsCommutable>, EVEX_V128;
4698  }
4699}
4700
4701multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4702                               AVX512VLVectorVTInfo VTInfo,
4703                               X86SchedWriteWidths sched, Predicate prd,
4704                               bit IsCommutable = 0> {
4705  let Predicates = [prd] in
4706    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4707                             IsCommutable>, EVEX_V512;
4708
4709  let Predicates = [prd, HasVLX] in {
4710    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4711                                 sched.YMM, IsCommutable>, EVEX_V256;
4712    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4713                                 sched.XMM, IsCommutable>, EVEX_V128;
4714  }
4715}
4716
4717multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4718                                X86SchedWriteWidths sched, Predicate prd,
4719                                bit IsCommutable = 0> {
4720  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4721                                  sched, prd, IsCommutable>,
4722                                  REX_W, EVEX_CD8<64, CD8VF>;
4723}
4724
4725multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4726                                X86SchedWriteWidths sched, Predicate prd,
4727                                bit IsCommutable = 0> {
4728  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4729                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4730}
4731
4732multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4733                                X86SchedWriteWidths sched, Predicate prd,
4734                                bit IsCommutable = 0> {
4735  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4736                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4737                                 WIG;
4738}
4739
4740multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4741                                X86SchedWriteWidths sched, Predicate prd,
4742                                bit IsCommutable = 0> {
4743  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4744                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4745                                 WIG;
4746}
4747
4748multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4749                                 SDNode OpNode, X86SchedWriteWidths sched,
4750                                 Predicate prd, bit IsCommutable = 0> {
4751  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4752                                   IsCommutable>;
4753
4754  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4755                                   IsCommutable>;
4756}
4757
4758multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4759                                 SDNode OpNode, X86SchedWriteWidths sched,
4760                                 Predicate prd, bit IsCommutable = 0> {
4761  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4762                                   IsCommutable>;
4763
4764  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4765                                   IsCommutable>;
4766}
4767
4768multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4769                                  bits<8> opc_d, bits<8> opc_q,
4770                                  string OpcodeStr, SDNode OpNode,
4771                                  X86SchedWriteWidths sched,
4772                                  bit IsCommutable = 0> {
4773  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4774                                    sched, HasAVX512, IsCommutable>,
4775              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4776                                    sched, HasBWI, IsCommutable>;
4777}
4778
4779multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4780                            X86FoldableSchedWrite sched,
4781                            SDNode OpNode,X86VectorVTInfo _Src,
4782                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4783                            bit IsCommutable = 0> {
4784  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4785                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4786                            "$src2, $src1","$src1, $src2",
4787                            (_Dst.VT (OpNode
4788                                         (_Src.VT _Src.RC:$src1),
4789                                         (_Src.VT _Src.RC:$src2))),
4790                            IsCommutable>,
4791                            AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4792  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4793                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4794                        "$src2, $src1", "$src1, $src2",
4795                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4796                                      (_Src.LdFrag addr:$src2)))>,
4797                        AVX512BIBase, EVEX, VVVV,
4798                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4799
4800  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4801                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4802                    OpcodeStr,
4803                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4804                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4805                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4806                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4807                    AVX512BIBase, EVEX, VVVV, EVEX_B,
4808                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4809}
4810
4811defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4812                                    SchedWriteVecALU, 1>;
4813defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4814                                    SchedWriteVecALU, 0>;
4815defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4816                                    SchedWriteVecALU, HasBWI, 1>;
4817defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4818                                    SchedWriteVecALU, HasBWI, 0>;
4819defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4820                                     SchedWriteVecALU, HasBWI, 1>;
4821defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4822                                     SchedWriteVecALU, HasBWI, 0>;
4823defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4824                                    SchedWritePMULLD, HasAVX512, 1>, T8;
4825defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4826                                    SchedWriteVecIMul, HasBWI, 1>;
4827defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4828                                    SchedWriteVecIMul, HasDQI, 1>, T8;
4829defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4830                                    HasBWI, 1>;
4831defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4832                                     HasBWI, 1>;
4833defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4834                                      SchedWriteVecIMul, HasBWI, 1>, T8;
4835defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4836                                   SchedWriteVecALU, HasBWI, 1>;
4837defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4838                                    SchedWriteVecIMul, HasAVX512, 1>, T8;
4839defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4840                                     SchedWriteVecIMul, HasAVX512, 1>;
4841
4842multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4843                            X86SchedWriteWidths sched,
4844                            AVX512VLVectorVTInfo _SrcVTInfo,
4845                            AVX512VLVectorVTInfo _DstVTInfo,
4846                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4847  let Predicates = [prd] in
4848    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4849                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4850                                 v8i64_info, IsCommutable>,
4851                                  EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4852  let Predicates = [HasVLX, prd] in {
4853    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4854                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4855                                      v4i64x_info, IsCommutable>,
4856                                      EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4857    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4858                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4859                                      v2i64x_info, IsCommutable>,
4860                                     EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4861  }
4862}
4863
4864defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4865                                avx512vl_i8_info, avx512vl_i8_info,
4866                                X86multishift, HasVBMI, 0>, T8;
4867
4868multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4869                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4870                            X86FoldableSchedWrite sched> {
4871  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4872                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4873                    OpcodeStr,
4874                    "${src2}"#_Src.BroadcastStr#", $src1",
4875                     "$src1, ${src2}"#_Src.BroadcastStr,
4876                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4877                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4878                    EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4879                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4880}
4881
4882multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4883                            SDNode OpNode,X86VectorVTInfo _Src,
4884                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4885                            bit IsCommutable = 0> {
4886  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4887                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4888                            "$src2, $src1","$src1, $src2",
4889                            (_Dst.VT (OpNode
4890                                         (_Src.VT _Src.RC:$src1),
4891                                         (_Src.VT _Src.RC:$src2))),
4892                            IsCommutable, IsCommutable>,
4893                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4894  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4895                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4896                        "$src2, $src1", "$src1, $src2",
4897                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4898                                      (_Src.LdFrag addr:$src2)))>,
4899                         EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4900                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4901}
4902
4903multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4904                                    SDNode OpNode> {
4905  let Predicates = [HasBWI] in
4906  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4907                                 v32i16_info, SchedWriteShuffle.ZMM>,
4908                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4909                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4910  let Predicates = [HasBWI, HasVLX] in {
4911    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4912                                     v16i16x_info, SchedWriteShuffle.YMM>,
4913                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4914                                      v16i16x_info, SchedWriteShuffle.YMM>,
4915                                      EVEX_V256;
4916    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4917                                     v8i16x_info, SchedWriteShuffle.XMM>,
4918                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4919                                      v8i16x_info, SchedWriteShuffle.XMM>,
4920                                      EVEX_V128;
4921  }
4922}
4923multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4924                            SDNode OpNode> {
4925  let Predicates = [HasBWI] in
4926  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4927                                SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4928  let Predicates = [HasBWI, HasVLX] in {
4929    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4930                                     v32i8x_info, SchedWriteShuffle.YMM>,
4931                                     EVEX_V256, WIG;
4932    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4933                                     v16i8x_info, SchedWriteShuffle.XMM>,
4934                                     EVEX_V128, WIG;
4935  }
4936}
4937
4938multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4939                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4940                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4941  let Predicates = [HasBWI] in
4942  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4943                                _Dst.info512, SchedWriteVecIMul.ZMM,
4944                                IsCommutable>, EVEX_V512;
4945  let Predicates = [HasBWI, HasVLX] in {
4946    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4947                                     _Dst.info256, SchedWriteVecIMul.YMM,
4948                                     IsCommutable>, EVEX_V256;
4949    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4950                                     _Dst.info128, SchedWriteVecIMul.XMM,
4951                                     IsCommutable>, EVEX_V128;
4952  }
4953}
4954
4955defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4956defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4957defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4958defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4959
4960defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4961                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4962defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4963                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4964
4965defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4966                                    SchedWriteVecALU, HasBWI, 1>, T8;
4967defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4968                                    SchedWriteVecALU, HasBWI, 1>;
4969defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4970                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4971defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4972                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4973
4974defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4975                                    SchedWriteVecALU, HasBWI, 1>;
4976defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4977                                    SchedWriteVecALU, HasBWI, 1>, T8;
4978defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4979                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4980defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4981                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4982
4983defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4984                                    SchedWriteVecALU, HasBWI, 1>, T8;
4985defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4986                                    SchedWriteVecALU, HasBWI, 1>;
4987defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4988                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4989defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4990                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4991
4992defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4993                                    SchedWriteVecALU, HasBWI, 1>;
4994defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4995                                    SchedWriteVecALU, HasBWI, 1>, T8;
4996defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4997                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4998defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4999                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5000
5001// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5002let Predicates = [HasDQI, NoVLX] in {
5003  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5004            (EXTRACT_SUBREG
5005                (VPMULLQZrr
5006                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5007                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5008             sub_ymm)>;
5009  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5010            (EXTRACT_SUBREG
5011                (VPMULLQZrmb
5012                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5013                    addr:$src2),
5014             sub_ymm)>;
5015
5016  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5017            (EXTRACT_SUBREG
5018                (VPMULLQZrr
5019                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5020                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5021             sub_xmm)>;
5022  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5023            (EXTRACT_SUBREG
5024                (VPMULLQZrmb
5025                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5026                    addr:$src2),
5027             sub_xmm)>;
5028}
5029
5030multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5031  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5032            (EXTRACT_SUBREG
5033                (!cast<Instruction>(Instr#"rr")
5034                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5035                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5036             sub_ymm)>;
5037  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5038            (EXTRACT_SUBREG
5039                (!cast<Instruction>(Instr#"rmb")
5040                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5041                    addr:$src2),
5042             sub_ymm)>;
5043
5044  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5045            (EXTRACT_SUBREG
5046                (!cast<Instruction>(Instr#"rr")
5047                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5048                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5049             sub_xmm)>;
5050  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5051            (EXTRACT_SUBREG
5052                (!cast<Instruction>(Instr#"rmb")
5053                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5054                    addr:$src2),
5055             sub_xmm)>;
5056}
5057
5058let Predicates = [HasAVX512, NoVLX] in {
5059  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5060  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5061  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5062  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5063}
5064
5065//===----------------------------------------------------------------------===//
5066// AVX-512  Logical Instructions
5067//===----------------------------------------------------------------------===//
5068
5069defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5070                                   SchedWriteVecLogic, HasAVX512, 1>;
5071defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5072                                  SchedWriteVecLogic, HasAVX512, 1>;
5073defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5074                                   SchedWriteVecLogic, HasAVX512, 1>;
5075defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5076                                    SchedWriteVecLogic, HasAVX512>;
5077
5078let Predicates = [HasVLX] in {
5079  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5080            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5081  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5082            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5083
5084  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5085            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5086  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5087            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5088
5089  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5090            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5091  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5092            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5093
5094  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5095            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5096  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5097            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5098
5099  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5100            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5101  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5102            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5103
5104  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5105            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5106  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5107            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5108
5109  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5110            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5111  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5112            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5113
5114  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5115            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5116  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5117            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5118
5119  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5120            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5121  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5122            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5123
5124  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5125            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5126  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5127            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5128
5129  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5130            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5131  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5132            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5133
5134  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5135            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5136  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5137            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5138
5139  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5140            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5141  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5142            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5143
5144  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5145            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5146  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5147            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5148
5149  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5150            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5151  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5152            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5153
5154  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5155            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5156  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5157            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5158}
5159
5160let Predicates = [HasAVX512] in {
5161  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5162            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5163  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5164            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5165
5166  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5167            (VPORQZrr VR512:$src1, VR512:$src2)>;
5168  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5169            (VPORQZrr VR512:$src1, VR512:$src2)>;
5170
5171  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5172            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5173  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5174            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5175
5176  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5177            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5178  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5179            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5180
5181  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5182            (VPANDQZrm VR512:$src1, addr:$src2)>;
5183  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5184            (VPANDQZrm VR512:$src1, addr:$src2)>;
5185
5186  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5187            (VPORQZrm VR512:$src1, addr:$src2)>;
5188  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5189            (VPORQZrm VR512:$src1, addr:$src2)>;
5190
5191  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5192            (VPXORQZrm VR512:$src1, addr:$src2)>;
5193  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5194            (VPXORQZrm VR512:$src1, addr:$src2)>;
5195
5196  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5197            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5198  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5199            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5200}
5201
5202// Patterns to catch vselect with different type than logic op.
5203multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5204                                    X86VectorVTInfo _,
5205                                    X86VectorVTInfo IntInfo> {
5206  // Masked register-register logical operations.
5207  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5208                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5209                   _.RC:$src0)),
5210            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5211             _.RC:$src1, _.RC:$src2)>;
5212
5213  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5214                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5215                   _.ImmAllZerosV)),
5216            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5217             _.RC:$src2)>;
5218
5219  // Masked register-memory logical operations.
5220  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5221                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5222                                            (load addr:$src2)))),
5223                   _.RC:$src0)),
5224            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5225             _.RC:$src1, addr:$src2)>;
5226  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5227                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5228                                            (load addr:$src2)))),
5229                   _.ImmAllZerosV)),
5230            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5231             addr:$src2)>;
5232}
5233
5234multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5235                                         X86VectorVTInfo _,
5236                                         X86VectorVTInfo IntInfo> {
5237  // Register-broadcast logical operations.
5238  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239                   (bitconvert
5240                    (IntInfo.VT (OpNode _.RC:$src1,
5241                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5242                   _.RC:$src0)),
5243            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5244             _.RC:$src1, addr:$src2)>;
5245  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5246                   (bitconvert
5247                    (IntInfo.VT (OpNode _.RC:$src1,
5248                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5249                   _.ImmAllZerosV)),
5250            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5251             _.RC:$src1, addr:$src2)>;
5252}
5253
5254multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5255                                         AVX512VLVectorVTInfo SelectInfo,
5256                                         AVX512VLVectorVTInfo IntInfo> {
5257let Predicates = [HasVLX] in {
5258  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5259                                 IntInfo.info128>;
5260  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5261                                 IntInfo.info256>;
5262}
5263let Predicates = [HasAVX512] in {
5264  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5265                                 IntInfo.info512>;
5266}
5267}
5268
5269multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5270                                               AVX512VLVectorVTInfo SelectInfo,
5271                                               AVX512VLVectorVTInfo IntInfo> {
5272let Predicates = [HasVLX] in {
5273  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5274                                       SelectInfo.info128, IntInfo.info128>;
5275  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5276                                       SelectInfo.info256, IntInfo.info256>;
5277}
5278let Predicates = [HasAVX512] in {
5279  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5280                                       SelectInfo.info512, IntInfo.info512>;
5281}
5282}
5283
5284multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5285  // i64 vselect with i32/i16/i8 logic op
5286  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5287                                       avx512vl_i32_info>;
5288  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5289                                       avx512vl_i16_info>;
5290  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5291                                       avx512vl_i8_info>;
5292
5293  // i32 vselect with i64/i16/i8 logic op
5294  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5295                                       avx512vl_i64_info>;
5296  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5297                                       avx512vl_i16_info>;
5298  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5299                                       avx512vl_i8_info>;
5300
5301  // f32 vselect with i64/i32/i16/i8 logic op
5302  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5303                                       avx512vl_i64_info>;
5304  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5305                                       avx512vl_i32_info>;
5306  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5307                                       avx512vl_i16_info>;
5308  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5309                                       avx512vl_i8_info>;
5310
5311  // f64 vselect with i64/i32/i16/i8 logic op
5312  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5313                                       avx512vl_i64_info>;
5314  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5315                                       avx512vl_i32_info>;
5316  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5317                                       avx512vl_i16_info>;
5318  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5319                                       avx512vl_i8_info>;
5320
5321  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5322                                             avx512vl_f32_info,
5323                                             avx512vl_i32_info>;
5324  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5325                                             avx512vl_f64_info,
5326                                             avx512vl_i64_info>;
5327}
5328
5329defm : avx512_logical_lowering_types<"VPAND", and>;
5330defm : avx512_logical_lowering_types<"VPOR",  or>;
5331defm : avx512_logical_lowering_types<"VPXOR", xor>;
5332defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5333
5334//===----------------------------------------------------------------------===//
5335// AVX-512  FP arithmetic
5336//===----------------------------------------------------------------------===//
5337
5338multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339                            SDPatternOperator OpNode, SDNode VecNode,
5340                            X86FoldableSchedWrite sched, bit IsCommutable> {
5341  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5342  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5344                           "$src2, $src1", "$src1, $src2",
5345                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5346                           Sched<[sched]>;
5347
5348  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5349                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5350                         "$src2, $src1", "$src1, $src2",
5351                         (_.VT (VecNode _.RC:$src1,
5352                                        (_.ScalarIntMemFrags addr:$src2)))>,
5353                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5354  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5355  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5356                         (ins _.FRC:$src1, _.FRC:$src2),
5357                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5358                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5359                          Sched<[sched]> {
5360    let isCommutable = IsCommutable;
5361  }
5362  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5363                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5364                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5365                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5366                         (_.ScalarLdFrag addr:$src2)))]>,
5367                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5368  }
5369  }
5370}
5371
5372multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5373                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5374  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5375  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5376                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5377                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5378                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5379                          (i32 timm:$rc))>,
5380                          EVEX_B, EVEX_RC, Sched<[sched]>;
5381}
5382multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5383                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5384                                X86FoldableSchedWrite sched, bit IsCommutable> {
5385  let ExeDomain = _.ExeDomain in {
5386  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5387                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5388                           "$src2, $src1", "$src1, $src2",
5389                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5390                           Sched<[sched]>, SIMD_EXC;
5391
5392  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5393                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5394                         "$src2, $src1", "$src1, $src2",
5395                         (_.VT (VecNode _.RC:$src1,
5396                                        (_.ScalarIntMemFrags addr:$src2)))>,
5397                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5398
5399  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5400      Uses = [MXCSR], mayRaiseFPException = 1 in {
5401  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5402                         (ins _.FRC:$src1, _.FRC:$src2),
5403                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5404                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5405                          Sched<[sched]> {
5406    let isCommutable = IsCommutable;
5407  }
5408  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5409                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5410                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5411                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5412                         (_.ScalarLdFrag addr:$src2)))]>,
5413                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5414  }
5415
5416  let Uses = [MXCSR] in
5417  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5418                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5419                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5420                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5421                            EVEX_B, Sched<[sched]>;
5422  }
5423}
5424
5425multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5426                                SDNode VecNode, SDNode RndNode,
5427                                X86SchedWriteSizes sched, bit IsCommutable> {
5428  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5429                              sched.PS.Scl, IsCommutable>,
5430             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5431                              sched.PS.Scl>,
5432                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5433  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5434                              sched.PD.Scl, IsCommutable>,
5435             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5436                              sched.PD.Scl>,
5437                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5438  let Predicates = [HasFP16] in
5439    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5440                                VecNode, sched.PH.Scl, IsCommutable>,
5441               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5442                                sched.PH.Scl>,
5443                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5444}
5445
5446multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5447                              SDNode VecNode, SDNode SaeNode,
5448                              X86SchedWriteSizes sched, bit IsCommutable> {
5449  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5450                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5451                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5452  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5453                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5454                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5455  let Predicates = [HasFP16] in {
5456    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5457                                VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5458                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5459  }
5460}
5461defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5462                                 SchedWriteFAddSizes, 1>;
5463defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5464                                 SchedWriteFMulSizes, 1>;
5465defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5466                                 SchedWriteFAddSizes, 0>;
5467defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5468                                 SchedWriteFDivSizes, 0>;
5469defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5470                               SchedWriteFCmpSizes, 0>;
5471defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5472                               SchedWriteFCmpSizes, 0>;
5473
5474// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5475// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5476multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5477                                    X86VectorVTInfo _, SDNode OpNode,
5478                                    X86FoldableSchedWrite sched> {
5479  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5480  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5481                         (ins _.FRC:$src1, _.FRC:$src2),
5482                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5483                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5484                          Sched<[sched]> {
5485    let isCommutable = 1;
5486  }
5487  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5488                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5489                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5490                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5491                         (_.ScalarLdFrag addr:$src2)))]>,
5492                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5493  }
5494}
5495defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5496                                         SchedWriteFCmp.Scl>, TB, XS,
5497                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5498
5499defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5500                                         SchedWriteFCmp.Scl>, TB, XD,
5501                                         REX_W, EVEX, VVVV, VEX_LIG,
5502                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5503
5504defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5505                                         SchedWriteFCmp.Scl>, TB, XS,
5506                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5507
5508defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5509                                         SchedWriteFCmp.Scl>, TB, XD,
5510                                         REX_W, EVEX, VVVV, VEX_LIG,
5511                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5512
5513defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5514                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5515                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5516
5517defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5518                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5519                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5520
5521multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5522                            SDPatternOperator MaskOpNode,
5523                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5524                            bit IsCommutable,
5525                            bit IsKCommutable = IsCommutable,
5526                            string suffix = _.Suffix,
5527                            string ClobberConstraint = "",
5528                            bit MayRaiseFPException = 1> {
5529  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5530      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5531  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5532                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5533                                 "$src2, $src1", "$src1, $src2",
5534                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5535                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5536                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5537  let mayLoad = 1 in {
5538    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5539                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5540                                   "$src2, $src1", "$src1, $src2",
5541                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5542                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5543                                   ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5544    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5545                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5546                                    "${src2}"#_.BroadcastStr#", $src1",
5547                                    "$src1, ${src2}"#_.BroadcastStr,
5548                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5549                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5550                                    ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5551    }
5552  }
5553}
5554
5555multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5556                                  SDPatternOperator OpNodeRnd,
5557                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5558                                  string suffix = _.Suffix,
5559                                  string ClobberConstraint = ""> {
5560  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5561  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5562                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5563                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5564                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5565                  0, 0, 0, vselect_mask, ClobberConstraint>,
5566                  EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5567}
5568
5569multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5570                                SDPatternOperator OpNodeSAE,
5571                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5572  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5575                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5576                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5577                  EVEX, VVVV, EVEX_B, Sched<[sched]>;
5578}
5579
5580multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5581                             SDPatternOperator MaskOpNode,
5582                             Predicate prd, X86SchedWriteSizes sched,
5583                             bit IsCommutable = 0,
5584                             bit IsPD128Commutable = IsCommutable> {
5585  let Predicates = [prd] in {
5586  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5587                              sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5588                              EVEX_CD8<32, CD8VF>;
5589  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5590                              sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5591                              EVEX_CD8<64, CD8VF>;
5592  }
5593
5594    // Define only if AVX512VL feature is present.
5595  let Predicates = [prd, HasVLX] in {
5596    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5597                                   sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5598                                   EVEX_CD8<32, CD8VF>;
5599    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5600                                   sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5601                                   EVEX_CD8<32, CD8VF>;
5602    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5603                                   sched.PD.XMM, IsPD128Commutable,
5604                                   IsCommutable>, EVEX_V128, TB, PD, REX_W,
5605                                   EVEX_CD8<64, CD8VF>;
5606    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5607                                   sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5608                                   EVEX_CD8<64, CD8VF>;
5609  }
5610}
5611
5612multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5613                              SDPatternOperator MaskOpNode,
5614                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5615  let Predicates = [HasFP16] in {
5616    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5617                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5618                                EVEX_CD8<16, CD8VF>;
5619  }
5620  let Predicates = [HasVLX, HasFP16] in {
5621    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5622                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5623                                   EVEX_CD8<16, CD8VF>;
5624    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5625                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5626                                   EVEX_CD8<16, CD8VF>;
5627  }
5628}
5629
5630let Uses = [MXCSR] in
5631multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5632                                   X86SchedWriteSizes sched> {
5633  let Predicates = [HasFP16] in {
5634    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5635                                      v32f16_info>,
5636                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5637  }
5638  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5639                                    v16f32_info>,
5640                                    EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5641  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5642                                    v8f64_info>,
5643                                    EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5644}
5645
5646let Uses = [MXCSR] in
5647multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5648                                 X86SchedWriteSizes sched> {
5649  let Predicates = [HasFP16] in {
5650    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5651                                    v32f16_info>,
5652                                    EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5653  }
5654  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5655                                  v16f32_info>,
5656                                  EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5657  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5658                                  v8f64_info>,
5659                                  EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5660}
5661
5662defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5663                              SchedWriteFAddSizes, 1>,
5664            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5665            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5666defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5667                              SchedWriteFMulSizes, 1>,
5668            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5669            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5670defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5671                              SchedWriteFAddSizes>,
5672            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5673            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5674defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5675                              SchedWriteFDivSizes>,
5676            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5677            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5678defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5679                              SchedWriteFCmpSizes, 0>,
5680            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5681            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5682defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5683                              SchedWriteFCmpSizes, 0>,
5684            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5685            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5686let isCodeGenOnly = 1 in {
5687  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5688                                 SchedWriteFCmpSizes, 1>,
5689               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5690                                 SchedWriteFCmpSizes, 1>;
5691  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5692                                 SchedWriteFCmpSizes, 1>,
5693               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5694                                 SchedWriteFCmpSizes, 1>;
5695}
5696let Uses = []<Register>, mayRaiseFPException = 0 in {
5697defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5698                               SchedWriteFLogicSizes, 1>;
5699defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5700                               SchedWriteFLogicSizes, 0>;
5701defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5702                               SchedWriteFLogicSizes, 1>;
5703defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5704                               SchedWriteFLogicSizes, 1>;
5705}
5706
5707multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5708                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5709  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5710  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5711                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5712                  "$src2, $src1", "$src1, $src2",
5713                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5714                  EVEX, VVVV, Sched<[sched]>;
5715  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5716                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5717                  "$src2, $src1", "$src1, $src2",
5718                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5719                  EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5720  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5721                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5722                   "${src2}"#_.BroadcastStr#", $src1",
5723                   "$src1, ${src2}"#_.BroadcastStr,
5724                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5725                   EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5726  }
5727}
5728
5729multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5730                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5731  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5732  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5733                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5734                  "$src2, $src1", "$src1, $src2",
5735                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5736                  Sched<[sched]>;
5737  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5738                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5739                  "$src2, $src1", "$src1, $src2",
5740                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5741                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5742  }
5743}
5744
5745multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5746                                X86SchedWriteWidths sched> {
5747  let Predicates = [HasFP16] in {
5748    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5749               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5750                                EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5751    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5752               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5753                             EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5754  }
5755  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5756             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5757                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5758  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5759             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5760                              EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5761  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5762             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5763                                    X86scalefsRnd, sched.Scl>,
5764                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5765  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5766             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5767                                    X86scalefsRnd, sched.Scl>,
5768                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5769
5770  // Define only if AVX512VL feature is present.
5771  let Predicates = [HasVLX] in {
5772    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5773                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5774    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5775                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5776    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5777                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5778    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5779                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5780  }
5781
5782  let Predicates = [HasFP16, HasVLX] in {
5783    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5784                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5785    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5786                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5787  }
5788}
5789defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5790
5791//===----------------------------------------------------------------------===//
5792// AVX-512  VPTESTM instructions
5793//===----------------------------------------------------------------------===//
5794
5795multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5796                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5797  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5798  // There are just too many permutations due to commutability and bitcasts.
5799  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5800  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5801                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5802                      "$src2, $src1", "$src1, $src2",
5803                   (null_frag), (null_frag), 1>,
5804                   EVEX, VVVV, Sched<[sched]>;
5805  let mayLoad = 1 in
5806  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5807                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5808                       "$src2, $src1", "$src1, $src2",
5809                   (null_frag), (null_frag)>,
5810                   EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5811                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5812  }
5813}
5814
5815multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5816                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5817  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5818  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5820                    "${src2}"#_.BroadcastStr#", $src1",
5821                    "$src1, ${src2}"#_.BroadcastStr,
5822                    (null_frag), (null_frag)>,
5823                    EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5824                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5825}
5826
5827multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5828                                  X86SchedWriteWidths sched,
5829                                  AVX512VLVectorVTInfo _> {
5830  let Predicates  = [HasAVX512] in
5831  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5832           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5833
5834  let Predicates = [HasAVX512, HasVLX] in {
5835  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5836              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5837  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5838              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5839  }
5840}
5841
5842multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5843                            X86SchedWriteWidths sched> {
5844  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5845                                 avx512vl_i32_info>;
5846  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5847                                 avx512vl_i64_info>, REX_W;
5848}
5849
5850multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5851                            X86SchedWriteWidths sched> {
5852  let Predicates = [HasBWI] in {
5853  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5854                            v32i16_info>, EVEX_V512, REX_W;
5855  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5856                            v64i8_info>, EVEX_V512;
5857  }
5858
5859  let Predicates = [HasVLX, HasBWI] in {
5860  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5861                            v16i16x_info>, EVEX_V256, REX_W;
5862  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5863                            v8i16x_info>, EVEX_V128, REX_W;
5864  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5865                            v32i8x_info>, EVEX_V256;
5866  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5867                            v16i8x_info>, EVEX_V128;
5868  }
5869}
5870
5871multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5872                                   X86SchedWriteWidths sched> :
5873  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5874  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5875
5876defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5877                                         SchedWriteVecLogic>, T8, PD;
5878defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5879                                         SchedWriteVecLogic>, T8, XS;
5880
5881//===----------------------------------------------------------------------===//
5882// AVX-512  Shift instructions
5883//===----------------------------------------------------------------------===//
5884
5885multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5886                            string OpcodeStr, SDNode OpNode,
5887                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5888  let ExeDomain = _.ExeDomain in {
5889  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5890                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5891                      "$src2, $src1", "$src1, $src2",
5892                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5893                   Sched<[sched]>;
5894  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5895                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5896                       "$src2, $src1", "$src1, $src2",
5897                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5898                          (i8 timm:$src2)))>,
5899                   Sched<[sched.Folded]>;
5900  }
5901}
5902
5903multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5904                             string OpcodeStr, SDNode OpNode,
5905                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5906  let ExeDomain = _.ExeDomain in
5907  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5908                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5909      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5910     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5911     EVEX_B, Sched<[sched.Folded]>;
5912}
5913
5914multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5915                            X86FoldableSchedWrite sched, ValueType SrcVT,
5916                            X86VectorVTInfo _> {
5917   // src2 is always 128-bit
5918  let ExeDomain = _.ExeDomain in {
5919  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5920                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5921                      "$src2, $src1", "$src1, $src2",
5922                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5923                   AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5924  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5925                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5926                       "$src2, $src1", "$src1, $src2",
5927                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5928                   AVX512BIBase,
5929                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5930  }
5931}
5932
5933multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5934                              X86SchedWriteWidths sched, ValueType SrcVT,
5935                              AVX512VLVectorVTInfo VTInfo,
5936                              Predicate prd> {
5937  let Predicates = [prd] in
5938  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5939                               VTInfo.info512>, EVEX_V512,
5940                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5941  let Predicates = [prd, HasVLX] in {
5942  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5943                               VTInfo.info256>, EVEX_V256,
5944                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5945  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5946                               VTInfo.info128>, EVEX_V128,
5947                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5948  }
5949}
5950
5951multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5952                              string OpcodeStr, SDNode OpNode,
5953                              X86SchedWriteWidths sched> {
5954  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5955                              avx512vl_i32_info, HasAVX512>;
5956  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5957                              avx512vl_i64_info, HasAVX512>, REX_W;
5958  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5959                              avx512vl_i16_info, HasBWI>;
5960}
5961
5962multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5963                                  string OpcodeStr, SDNode OpNode,
5964                                  X86SchedWriteWidths sched,
5965                                  AVX512VLVectorVTInfo VTInfo> {
5966  let Predicates = [HasAVX512] in
5967  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5968                              sched.ZMM, VTInfo.info512>,
5969             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5970                               VTInfo.info512>, EVEX_V512;
5971  let Predicates = [HasAVX512, HasVLX] in {
5972  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5973                              sched.YMM, VTInfo.info256>,
5974             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5975                               VTInfo.info256>, EVEX_V256;
5976  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5977                              sched.XMM, VTInfo.info128>,
5978             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5979                               VTInfo.info128>, EVEX_V128;
5980  }
5981}
5982
5983multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5984                              string OpcodeStr, SDNode OpNode,
5985                              X86SchedWriteWidths sched> {
5986  let Predicates = [HasBWI] in
5987  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5988                               sched.ZMM, v32i16_info>, EVEX_V512, WIG;
5989  let Predicates = [HasVLX, HasBWI] in {
5990  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5991                               sched.YMM, v16i16x_info>, EVEX_V256, WIG;
5992  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5993                               sched.XMM, v8i16x_info>, EVEX_V128, WIG;
5994  }
5995}
5996
5997multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5998                               Format ImmFormR, Format ImmFormM,
5999                               string OpcodeStr, SDNode OpNode,
6000                               X86SchedWriteWidths sched> {
6001  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6002                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6003  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6004                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6005}
6006
6007defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6008                                 SchedWriteVecShiftImm>,
6009             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6010                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6011
6012defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6013                                 SchedWriteVecShiftImm>,
6014             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6015                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6016
6017defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6018                                 SchedWriteVecShiftImm>,
6019             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6020                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6021
6022defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6023                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6024defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6025                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6026
6027defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6028                                SchedWriteVecShift>;
6029defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6030                                SchedWriteVecShift>;
6031defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6032                                SchedWriteVecShift>;
6033
6034// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6035let Predicates = [HasAVX512, NoVLX] in {
6036  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6037            (EXTRACT_SUBREG (v8i64
6038              (VPSRAQZrr
6039                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6040                 VR128X:$src2)), sub_ymm)>;
6041
6042  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6043            (EXTRACT_SUBREG (v8i64
6044              (VPSRAQZrr
6045                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6046                 VR128X:$src2)), sub_xmm)>;
6047
6048  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6049            (EXTRACT_SUBREG (v8i64
6050              (VPSRAQZri
6051                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052                 timm:$src2)), sub_ymm)>;
6053
6054  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6055            (EXTRACT_SUBREG (v8i64
6056              (VPSRAQZri
6057                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                 timm:$src2)), sub_xmm)>;
6059}
6060
6061//===-------------------------------------------------------------------===//
6062// Variable Bit Shifts
6063//===-------------------------------------------------------------------===//
6064
6065multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6066                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067  let ExeDomain = _.ExeDomain in {
6068  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6069                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6070                      "$src2, $src1", "$src1, $src2",
6071                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6072                   AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6073  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6074                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6075                       "$src2, $src1", "$src1, $src2",
6076                   (_.VT (OpNode _.RC:$src1,
6077                   (_.VT (_.LdFrag addr:$src2))))>,
6078                   AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6079                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6080  }
6081}
6082
6083multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6084                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6085  let ExeDomain = _.ExeDomain in
6086  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6087                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6088                    "${src2}"#_.BroadcastStr#", $src1",
6089                    "$src1, ${src2}"#_.BroadcastStr,
6090                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6091                    AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6092                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6093}
6094
6095multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6097  let Predicates  = [HasAVX512] in
6098  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6099           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6100
6101  let Predicates = [HasAVX512, HasVLX] in {
6102  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6103              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6104  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6105              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6106  }
6107}
6108
6109multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6110                                  SDNode OpNode, X86SchedWriteWidths sched> {
6111  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6112                                 avx512vl_i32_info>;
6113  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6114                                 avx512vl_i64_info>, REX_W;
6115}
6116
6117// Use 512bit version to implement 128/256 bit in case NoVLX.
6118multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6119                                     SDNode OpNode, list<Predicate> p> {
6120  let Predicates = p in {
6121  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6122                                  (_.info256.VT _.info256.RC:$src2))),
6123            (EXTRACT_SUBREG
6124                (!cast<Instruction>(OpcodeStr#"Zrr")
6125                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6126                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6127             sub_ymm)>;
6128
6129  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6130                                  (_.info128.VT _.info128.RC:$src2))),
6131            (EXTRACT_SUBREG
6132                (!cast<Instruction>(OpcodeStr#"Zrr")
6133                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6134                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6135             sub_xmm)>;
6136  }
6137}
6138multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6139                              SDNode OpNode, X86SchedWriteWidths sched> {
6140  let Predicates = [HasBWI] in
6141  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6142              EVEX_V512, REX_W;
6143  let Predicates = [HasVLX, HasBWI] in {
6144
6145  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6146              EVEX_V256, REX_W;
6147  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6148              EVEX_V128, REX_W;
6149  }
6150}
6151
6152defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6153              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6154
6155defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6156              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6157
6158defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6159              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6160
6161defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6162defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6163
6164defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6165defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6166defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6167defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6168
6169
6170// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6171let Predicates = [HasAVX512, NoVLX] in {
6172  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6173            (EXTRACT_SUBREG (v8i64
6174              (VPROLVQZrr
6175                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6176                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6177                       sub_xmm)>;
6178  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6179            (EXTRACT_SUBREG (v8i64
6180              (VPROLVQZrr
6181                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6182                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6183                       sub_ymm)>;
6184
6185  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6186            (EXTRACT_SUBREG (v16i32
6187              (VPROLVDZrr
6188                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6189                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6190                        sub_xmm)>;
6191  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6192            (EXTRACT_SUBREG (v16i32
6193              (VPROLVDZrr
6194                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6195                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6196                        sub_ymm)>;
6197
6198  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6199            (EXTRACT_SUBREG (v8i64
6200              (VPROLQZri
6201                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6202                        timm:$src2)), sub_xmm)>;
6203  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6204            (EXTRACT_SUBREG (v8i64
6205              (VPROLQZri
6206                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207                       timm:$src2)), sub_ymm)>;
6208
6209  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6210            (EXTRACT_SUBREG (v16i32
6211              (VPROLDZri
6212                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6213                        timm:$src2)), sub_xmm)>;
6214  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6215            (EXTRACT_SUBREG (v16i32
6216              (VPROLDZri
6217                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6218                        timm:$src2)), sub_ymm)>;
6219}
6220
6221// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6222let Predicates = [HasAVX512, NoVLX] in {
6223  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6224            (EXTRACT_SUBREG (v8i64
6225              (VPRORVQZrr
6226                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6227                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6228                       sub_xmm)>;
6229  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6230            (EXTRACT_SUBREG (v8i64
6231              (VPRORVQZrr
6232                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6233                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6234                       sub_ymm)>;
6235
6236  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6237            (EXTRACT_SUBREG (v16i32
6238              (VPRORVDZrr
6239                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6240                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6241                        sub_xmm)>;
6242  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6243            (EXTRACT_SUBREG (v16i32
6244              (VPRORVDZrr
6245                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6246                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6247                        sub_ymm)>;
6248
6249  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6250            (EXTRACT_SUBREG (v8i64
6251              (VPRORQZri
6252                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6253                        timm:$src2)), sub_xmm)>;
6254  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6255            (EXTRACT_SUBREG (v8i64
6256              (VPRORQZri
6257                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258                       timm:$src2)), sub_ymm)>;
6259
6260  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6261            (EXTRACT_SUBREG (v16i32
6262              (VPRORDZri
6263                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6264                        timm:$src2)), sub_xmm)>;
6265  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6266            (EXTRACT_SUBREG (v16i32
6267              (VPRORDZri
6268                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6269                        timm:$src2)), sub_ymm)>;
6270}
6271
6272//===-------------------------------------------------------------------===//
6273// 1-src variable permutation VPERMW/D/Q
6274//===-------------------------------------------------------------------===//
6275
6276multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6277                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6278  let Predicates  = [HasAVX512] in
6279  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6280           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6281
6282  let Predicates = [HasAVX512, HasVLX] in
6283  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6284              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6285}
6286
6287multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6288                                 string OpcodeStr, SDNode OpNode,
6289                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6290  let Predicates = [HasAVX512] in
6291  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6292                              sched, VTInfo.info512>,
6293             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6294                               sched, VTInfo.info512>, EVEX_V512;
6295  let Predicates = [HasAVX512, HasVLX] in
6296  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6297                              sched, VTInfo.info256>,
6298             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6299                               sched, VTInfo.info256>, EVEX_V256;
6300}
6301
6302multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6303                              Predicate prd, SDNode OpNode,
6304                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6305  let Predicates = [prd] in
6306  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6307              EVEX_V512 ;
6308  let Predicates = [HasVLX, prd] in {
6309  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6310              EVEX_V256 ;
6311  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6312              EVEX_V128 ;
6313  }
6314}
6315
6316defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6317                               WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6318defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6319                               WriteVarShuffle256, avx512vl_i8_info>;
6320
6321defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6322                                    WriteVarShuffle256, avx512vl_i32_info>;
6323defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6324                                    WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6325defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6326                                     WriteFVarShuffle256, avx512vl_f32_info>;
6327defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6328                                     WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6329
6330defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6331                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6332                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6333defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6334                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6335                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6336
6337//===----------------------------------------------------------------------===//
6338// AVX-512 - VPERMIL
6339//===----------------------------------------------------------------------===//
6340
6341multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6342                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6343                             X86VectorVTInfo Ctrl> {
6344  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6345                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6346                  "$src2, $src1", "$src1, $src2",
6347                  (_.VT (OpNode _.RC:$src1,
6348                               (Ctrl.VT Ctrl.RC:$src2)))>,
6349                  T8, PD, EVEX, VVVV, Sched<[sched]>;
6350  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6351                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6352                  "$src2, $src1", "$src1, $src2",
6353                  (_.VT (OpNode
6354                           _.RC:$src1,
6355                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6356                  T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6357                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6358  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6359                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6360                   "${src2}"#_.BroadcastStr#", $src1",
6361                   "$src1, ${src2}"#_.BroadcastStr,
6362                   (_.VT (OpNode
6363                            _.RC:$src1,
6364                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6365                   T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6366                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6367}
6368
6369multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6370                                    X86SchedWriteWidths sched,
6371                                    AVX512VLVectorVTInfo _,
6372                                    AVX512VLVectorVTInfo Ctrl> {
6373  let Predicates = [HasAVX512] in {
6374    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6375                                  _.info512, Ctrl.info512>, EVEX_V512;
6376  }
6377  let Predicates = [HasAVX512, HasVLX] in {
6378    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6379                                  _.info128, Ctrl.info128>, EVEX_V128;
6380    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6381                                  _.info256, Ctrl.info256>, EVEX_V256;
6382  }
6383}
6384
6385multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6386                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6387  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6388                                      _, Ctrl>;
6389  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6390                                    X86VPermilpi, SchedWriteFShuffle, _>,
6391                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6392}
6393
6394let ExeDomain = SSEPackedSingle in
6395defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6396                               avx512vl_i32_info>;
6397let ExeDomain = SSEPackedDouble in
6398defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6399                               avx512vl_i64_info>, REX_W;
6400
6401//===----------------------------------------------------------------------===//
6402// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6403//===----------------------------------------------------------------------===//
6404
6405defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6406                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6407                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6408defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6409                                  X86PShufhw, SchedWriteShuffle>,
6410                                  EVEX, AVX512XSIi8Base;
6411defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6412                                  X86PShuflw, SchedWriteShuffle>,
6413                                  EVEX, AVX512XDIi8Base;
6414
6415//===----------------------------------------------------------------------===//
6416// AVX-512 - VPSHUFB
6417//===----------------------------------------------------------------------===//
6418
6419multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6420                               X86SchedWriteWidths sched> {
6421  let Predicates = [HasBWI] in
6422  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6423                              EVEX_V512;
6424
6425  let Predicates = [HasVLX, HasBWI] in {
6426  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6427                              EVEX_V256;
6428  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6429                              EVEX_V128;
6430  }
6431}
6432
6433defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6434                                  SchedWriteVarShuffle>, WIG;
6435
6436//===----------------------------------------------------------------------===//
6437// Move Low to High and High to Low packed FP Instructions
6438//===----------------------------------------------------------------------===//
6439
6440def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6441          (ins VR128X:$src1, VR128X:$src2),
6442          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6443          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6444          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6445let isCommutable = 1 in
6446def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6447          (ins VR128X:$src1, VR128X:$src2),
6448          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6449          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6450          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6451
6452//===----------------------------------------------------------------------===//
6453// VMOVHPS/PD VMOVLPS Instructions
6454// All patterns was taken from SSS implementation.
6455//===----------------------------------------------------------------------===//
6456
6457multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6458                                  SDPatternOperator OpNode,
6459                                  X86VectorVTInfo _> {
6460  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6461  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6462                  (ins _.RC:$src1, f64mem:$src2),
6463                  !strconcat(OpcodeStr,
6464                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6465                  [(set _.RC:$dst,
6466                     (OpNode _.RC:$src1,
6467                       (_.VT (bitconvert
6468                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6469                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6470}
6471
6472// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6473// SSE1. And MOVLPS pattern is even more complex.
6474defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6475                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6476defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6477                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6478defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6479                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6480defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6481                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6482
6483let Predicates = [HasAVX512] in {
6484  // VMOVHPD patterns
6485  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6486            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6487
6488  // VMOVLPD patterns
6489  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6490            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6491}
6492
6493let SchedRW = [WriteFStore] in {
6494let mayStore = 1, hasSideEffects = 0 in
6495def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6496                       (ins f64mem:$dst, VR128X:$src),
6497                       "vmovhps\t{$src, $dst|$dst, $src}",
6498                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6499def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6500                       (ins f64mem:$dst, VR128X:$src),
6501                       "vmovhpd\t{$src, $dst|$dst, $src}",
6502                       [(store (f64 (extractelt
6503                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6504                                     (iPTR 0))), addr:$dst)]>,
6505                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6506let mayStore = 1, hasSideEffects = 0 in
6507def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6508                       (ins f64mem:$dst, VR128X:$src),
6509                       "vmovlps\t{$src, $dst|$dst, $src}",
6510                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6512                       (ins f64mem:$dst, VR128X:$src),
6513                       "vmovlpd\t{$src, $dst|$dst, $src}",
6514                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6515                                     (iPTR 0))), addr:$dst)]>,
6516                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6517} // SchedRW
6518
6519let Predicates = [HasAVX512] in {
6520  // VMOVHPD patterns
6521  def : Pat<(store (f64 (extractelt
6522                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6523                           (iPTR 0))), addr:$dst),
6524           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6525}
6526//===----------------------------------------------------------------------===//
6527// FMA - Fused Multiply Operations
6528//
6529
6530multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6531                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6532                               X86VectorVTInfo _> {
6533  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6534      Uses = [MXCSR], mayRaiseFPException = 1 in {
6535  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6536          (ins _.RC:$src2, _.RC:$src3),
6537          OpcodeStr, "$src3, $src2", "$src2, $src3",
6538          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6539          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6540          EVEX, VVVV, Sched<[sched]>;
6541
6542  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6543          (ins _.RC:$src2, _.MemOp:$src3),
6544          OpcodeStr, "$src3, $src2", "$src2, $src3",
6545          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6546          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6547          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6548                          sched.ReadAfterFold]>;
6549
6550  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6551            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6552            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6553            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6554            (OpNode _.RC:$src2,
6555             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6556            (MaskOpNode _.RC:$src2,
6557             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6558            EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6559                                    sched.ReadAfterFold]>;
6560  }
6561}
6562
6563multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564                                 X86FoldableSchedWrite sched,
6565                                 X86VectorVTInfo _> {
6566  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6567      Uses = [MXCSR] in
6568  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6569          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6570          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6571          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6572          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6573          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6574}
6575
6576multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6577                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6578                                   X86SchedWriteWidths sched,
6579                                   AVX512VLVectorVTInfo _,
6580                                   Predicate prd = HasAVX512> {
6581  let Predicates = [prd] in {
6582    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6583                                      sched.ZMM, _.info512>,
6584                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6585                                        _.info512>,
6586                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6587  }
6588  let Predicates = [HasVLX, prd] in {
6589    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6590                                    sched.YMM, _.info256>,
6591                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6592    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6593                                    sched.XMM, _.info128>,
6594                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6595  }
6596}
6597
6598multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6599                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6600    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6601                                      OpNodeRnd, SchedWriteFMA,
6602                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6603    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6604                                      OpNodeRnd, SchedWriteFMA,
6605                                      avx512vl_f32_info>, T8, PD;
6606    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6607                                      OpNodeRnd, SchedWriteFMA,
6608                                      avx512vl_f64_info>, T8, PD, REX_W;
6609}
6610
6611defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6612                                       fma, X86FmaddRnd>;
6613defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6614                                       X86Fmsub, X86FmsubRnd>;
6615defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6616                                       X86Fmaddsub, X86FmaddsubRnd>;
6617defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6618                                       X86Fmsubadd, X86FmsubaddRnd>;
6619defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6620                                       X86Fnmadd, X86FnmaddRnd>;
6621defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6622                                       X86Fnmsub, X86FnmsubRnd>;
6623
6624
6625multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6626                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6627                               X86VectorVTInfo _> {
6628  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6629      Uses = [MXCSR], mayRaiseFPException = 1 in {
6630  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631          (ins _.RC:$src2, _.RC:$src3),
6632          OpcodeStr, "$src3, $src2", "$src2, $src3",
6633          (null_frag),
6634          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6635          EVEX, VVVV, Sched<[sched]>;
6636
6637  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6638          (ins _.RC:$src2, _.MemOp:$src3),
6639          OpcodeStr, "$src3, $src2", "$src2, $src3",
6640          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6641          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6642          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6643                          sched.ReadAfterFold]>;
6644
6645  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6646         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6647         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6648         "$src2, ${src3}"#_.BroadcastStr,
6649         (_.VT (OpNode _.RC:$src2,
6650                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6651                      _.RC:$src1)),
6652         (_.VT (MaskOpNode _.RC:$src2,
6653                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6654                           _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6655         Sched<[sched.Folded, sched.ReadAfterFold,
6656                sched.ReadAfterFold]>;
6657  }
6658}
6659
6660multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6661                                 X86FoldableSchedWrite sched,
6662                                 X86VectorVTInfo _> {
6663  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6664      Uses = [MXCSR] in
6665  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6666          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6667          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6668          (null_frag),
6669          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6670          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6671}
6672
6673multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6674                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6675                                   X86SchedWriteWidths sched,
6676                                   AVX512VLVectorVTInfo _,
6677                                   Predicate prd = HasAVX512> {
6678  let Predicates = [prd] in {
6679    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6680                                      sched.ZMM, _.info512>,
6681                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6682                                        _.info512>,
6683                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6684  }
6685  let Predicates = [HasVLX, prd] in {
6686    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6687                                    sched.YMM, _.info256>,
6688                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6689    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6690                                    sched.XMM, _.info128>,
6691                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6692  }
6693}
6694
6695multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6696                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6697    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6698                                      OpNodeRnd, SchedWriteFMA,
6699                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6700    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6701                                      OpNodeRnd, SchedWriteFMA,
6702                                      avx512vl_f32_info>, T8, PD;
6703    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6704                                      OpNodeRnd, SchedWriteFMA,
6705                                      avx512vl_f64_info>, T8, PD, REX_W;
6706}
6707
6708defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6709                                       fma, X86FmaddRnd>;
6710defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6711                                       X86Fmsub, X86FmsubRnd>;
6712defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6713                                       X86Fmaddsub, X86FmaddsubRnd>;
6714defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6715                                       X86Fmsubadd, X86FmsubaddRnd>;
6716defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6717                                       X86Fnmadd, X86FnmaddRnd>;
6718defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6719                                       X86Fnmsub, X86FnmsubRnd>;
6720
6721multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6722                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6723                               X86VectorVTInfo _> {
6724  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6725      Uses = [MXCSR], mayRaiseFPException = 1 in {
6726  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6727          (ins _.RC:$src2, _.RC:$src3),
6728          OpcodeStr, "$src3, $src2", "$src2, $src3",
6729          (null_frag),
6730          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6731          EVEX, VVVV, Sched<[sched]>;
6732
6733  // Pattern is 312 order so that the load is in a different place from the
6734  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6735  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6736          (ins _.RC:$src2, _.MemOp:$src3),
6737          OpcodeStr, "$src3, $src2", "$src2, $src3",
6738          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6739          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6740          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6741                          sched.ReadAfterFold]>;
6742
6743  // Pattern is 312 order so that the load is in a different place from the
6744  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6745  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6746         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6747         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6748         "$src2, ${src3}"#_.BroadcastStr,
6749         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6750                       _.RC:$src1, _.RC:$src2)),
6751         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6752                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6753         EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6754                                 sched.ReadAfterFold]>;
6755  }
6756}
6757
6758multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6759                                 X86FoldableSchedWrite sched,
6760                                 X86VectorVTInfo _> {
6761  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6762      Uses = [MXCSR] in
6763  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6764          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6765          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6766          (null_frag),
6767          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6768          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6769}
6770
6771multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6772                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6773                                   X86SchedWriteWidths sched,
6774                                   AVX512VLVectorVTInfo _,
6775                                   Predicate prd = HasAVX512> {
6776  let Predicates = [prd] in {
6777    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6778                                      sched.ZMM, _.info512>,
6779                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6780                                        _.info512>,
6781                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6782  }
6783  let Predicates = [HasVLX, prd] in {
6784    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6785                                    sched.YMM, _.info256>,
6786                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6787    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6788                                    sched.XMM, _.info128>,
6789                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6790  }
6791}
6792
6793multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6794                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6795    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6796                                      OpNodeRnd, SchedWriteFMA,
6797                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6798    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6799                                      OpNodeRnd, SchedWriteFMA,
6800                                      avx512vl_f32_info>, T8, PD;
6801    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6802                                      OpNodeRnd, SchedWriteFMA,
6803                                      avx512vl_f64_info>, T8, PD, REX_W;
6804}
6805
6806defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6807                                       fma, X86FmaddRnd>;
6808defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6809                                       X86Fmsub, X86FmsubRnd>;
6810defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6811                                       X86Fmaddsub, X86FmaddsubRnd>;
6812defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6813                                       X86Fmsubadd, X86FmsubaddRnd>;
6814defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6815                                       X86Fnmadd, X86FnmaddRnd>;
6816defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6817                                       X86Fnmsub, X86FnmsubRnd>;
6818
6819// Scalar FMA
6820multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6821                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6822let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6823  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6824          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6825          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6826          EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6827
6828  let mayLoad = 1 in
6829  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6830          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6831          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6832          EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6833                          SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6834
6835  let Uses = [MXCSR] in
6836  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6837         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6838         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6839         EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6840
6841  let isCodeGenOnly = 1, isCommutable = 1 in {
6842    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6843                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6844                     !strconcat(OpcodeStr,
6845                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6846                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6847    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6848                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6849                    !strconcat(OpcodeStr,
6850                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6851                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6852                                     SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6853
6854    let Uses = [MXCSR] in
6855    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6856                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6857                     !strconcat(OpcodeStr,
6858                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6859                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6860                     Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6861  }// isCodeGenOnly = 1
6862}// Constraints = "$src1 = $dst"
6863}
6864
6865multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6866                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6867                            X86VectorVTInfo _, string SUFF> {
6868  let ExeDomain = _.ExeDomain in {
6869  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6870                // Operands for intrinsic are in 123 order to preserve passthu
6871                // semantics.
6872                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6873                         _.FRC:$src3))),
6874                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6875                         (_.ScalarLdFrag addr:$src3)))),
6876                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6877                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6878
6879  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6880                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6881                                          _.FRC:$src1))),
6882                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6883                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6884                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6885                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6886
6887  // One pattern is 312 order so that the load is in a different place from the
6888  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6889  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6890                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6891                         _.FRC:$src2))),
6892                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6893                                 _.FRC:$src1, _.FRC:$src2))),
6894                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6895                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6896  }
6897}
6898
6899multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6900                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6901  let Predicates = [HasAVX512] in {
6902    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6903                                 OpNodeRnd, f32x_info, "SS">,
6904                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6905    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6906                                 OpNodeRnd, f64x_info, "SD">,
6907                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6908  }
6909  let Predicates = [HasFP16] in {
6910    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6911                                 OpNodeRnd, f16x_info, "SH">,
6912                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6913  }
6914}
6915
6916defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6917defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6918defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6919defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6920
6921multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6922                                      SDNode RndOp, string Prefix,
6923                                      string Suffix, SDNode Move,
6924                                      X86VectorVTInfo _, PatLeaf ZeroFP,
6925                                      Predicate prd = HasAVX512> {
6926  let Predicates = [prd] in {
6927    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928                (Op _.FRC:$src2,
6929                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6930                    _.FRC:$src3))))),
6931              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6932               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6933               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6934
6935    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6936                (Op _.FRC:$src2, _.FRC:$src3,
6937                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6938              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6939               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6940               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6941
6942    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6943                (Op _.FRC:$src2,
6944                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6945                    (_.ScalarLdFrag addr:$src3)))))),
6946              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6947               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6948               addr:$src3)>;
6949
6950    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6951                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6952                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6953              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6954               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6955               addr:$src3)>;
6956
6957    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6958                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6959                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6960              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6961               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6962               addr:$src3)>;
6963
6964    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6965               (X86selects_mask VK1WM:$mask,
6966                (MaskedOp _.FRC:$src2,
6967                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6968                    _.FRC:$src3),
6969                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6970              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6971               VR128X:$src1, VK1WM:$mask,
6972               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6973               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6974
6975    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6976               (X86selects_mask VK1WM:$mask,
6977                (MaskedOp _.FRC:$src2,
6978                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6979                    (_.ScalarLdFrag addr:$src3)),
6980                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6981              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6982               VR128X:$src1, VK1WM:$mask,
6983               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6984
6985    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6986               (X86selects_mask VK1WM:$mask,
6987                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6988                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6989                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6990              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6991               VR128X:$src1, VK1WM:$mask,
6992               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6993
6994    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6995               (X86selects_mask VK1WM:$mask,
6996                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6997                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6998                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6999              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7000               VR128X:$src1, VK1WM:$mask,
7001               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7002               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7003
7004    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7005               (X86selects_mask VK1WM:$mask,
7006                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7007                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7008                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7009              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7010               VR128X:$src1, VK1WM:$mask,
7011               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7012
7013    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7014               (X86selects_mask VK1WM:$mask,
7015                (MaskedOp _.FRC:$src2,
7016                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7017                          _.FRC:$src3),
7018                (_.EltVT ZeroFP)))))),
7019              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7020               VR128X:$src1, VK1WM:$mask,
7021               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7022               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7023
7024    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7025               (X86selects_mask VK1WM:$mask,
7026                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7027                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7028                (_.EltVT ZeroFP)))))),
7029              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7030               VR128X:$src1, VK1WM:$mask,
7031               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7032               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7033
7034    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7035               (X86selects_mask VK1WM:$mask,
7036                (MaskedOp _.FRC:$src2,
7037                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7038                          (_.ScalarLdFrag addr:$src3)),
7039                (_.EltVT ZeroFP)))))),
7040              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7041               VR128X:$src1, VK1WM:$mask,
7042               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7043
7044    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7045               (X86selects_mask VK1WM:$mask,
7046                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7047                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7048                (_.EltVT ZeroFP)))))),
7049              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7050               VR128X:$src1, VK1WM:$mask,
7051               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7052
7053    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7054               (X86selects_mask VK1WM:$mask,
7055                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7056                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7057                (_.EltVT ZeroFP)))))),
7058              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7059               VR128X:$src1, VK1WM:$mask,
7060               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7061
7062    // Patterns with rounding mode.
7063    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7064                (RndOp _.FRC:$src2,
7065                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7066                       _.FRC:$src3, (i32 timm:$rc)))))),
7067              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7068               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7069               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7070
7071    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7072                (RndOp _.FRC:$src2, _.FRC:$src3,
7073                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7074                       (i32 timm:$rc)))))),
7075              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7076               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7077               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7078
7079    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7080               (X86selects_mask VK1WM:$mask,
7081                (RndOp _.FRC:$src2,
7082                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7083                       _.FRC:$src3, (i32 timm:$rc)),
7084                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7085              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7086               VR128X:$src1, VK1WM:$mask,
7087               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7088               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7089
7090    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7091               (X86selects_mask VK1WM:$mask,
7092                (RndOp _.FRC:$src2, _.FRC:$src3,
7093                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7094                       (i32 timm:$rc)),
7095                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7096              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7097               VR128X:$src1, VK1WM:$mask,
7098               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7099               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7100
7101    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7102               (X86selects_mask VK1WM:$mask,
7103                (RndOp _.FRC:$src2,
7104                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7105                       _.FRC:$src3, (i32 timm:$rc)),
7106                (_.EltVT ZeroFP)))))),
7107              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7108               VR128X:$src1, VK1WM:$mask,
7109               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7110               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7111
7112    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7113               (X86selects_mask VK1WM:$mask,
7114                (RndOp _.FRC:$src2, _.FRC:$src3,
7115                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7116                       (i32 timm:$rc)),
7117                (_.EltVT ZeroFP)))))),
7118              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7119               VR128X:$src1, VK1WM:$mask,
7120               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7121               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7122  }
7123}
7124defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7125                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7126defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7127                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7128defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7129                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7130defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7131                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7132
7133defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7134                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7135defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7136                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7137defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7138                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7139defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7140                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7141
7142defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7143                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7144defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7145                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7146defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7147                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7148defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7149                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7150
7151//===----------------------------------------------------------------------===//
7152// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7153//===----------------------------------------------------------------------===//
7154let Constraints = "$src1 = $dst" in {
7155multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7156                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7157  // NOTE: The SDNode have the multiply operands first with the add last.
7158  // This enables commuted load patterns to be autogenerated by tablegen.
7159  let ExeDomain = _.ExeDomain in {
7160  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7161          (ins _.RC:$src2, _.RC:$src3),
7162          OpcodeStr, "$src3, $src2", "$src2, $src3",
7163          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7164          T8, PD, EVEX, VVVV, Sched<[sched]>;
7165
7166  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7167          (ins _.RC:$src2, _.MemOp:$src3),
7168          OpcodeStr, "$src3, $src2", "$src2, $src3",
7169          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7170          T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7171                                sched.ReadAfterFold]>;
7172
7173  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7174            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7175            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7176            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7177            (OpNode _.RC:$src2,
7178                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7179                    _.RC:$src1)>,
7180            T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7181                                          sched.ReadAfterFold]>;
7182  }
7183}
7184} // Constraints = "$src1 = $dst"
7185
7186multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7187                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7188  let Predicates = [HasIFMA] in {
7189    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7190                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7191  }
7192  let Predicates = [HasVLX, HasIFMA] in {
7193    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7194                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7195    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7196                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7197  }
7198}
7199
7200defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7201                                         SchedWriteVecIMul, avx512vl_i64_info>,
7202                                         REX_W;
7203defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7204                                         SchedWriteVecIMul, avx512vl_i64_info>,
7205                                         REX_W;
7206
7207//===----------------------------------------------------------------------===//
7208// AVX-512  Scalar convert from sign integer to float/double
7209//===----------------------------------------------------------------------===//
7210
7211multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7212                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7213                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7214                    string mem, list<Register> _Uses = [MXCSR],
7215                    bit _mayRaiseFPException = 1> {
7216let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7217    mayRaiseFPException = _mayRaiseFPException in {
7218  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7219    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7220              (ins DstVT.FRC:$src1, SrcRC:$src),
7221              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7222              EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7223    let mayLoad = 1 in
7224      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7225              (ins DstVT.FRC:$src1, x86memop:$src),
7226              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7227              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7228  } // hasSideEffects = 0
7229  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7230                (ins DstVT.RC:$src1, SrcRC:$src2),
7231                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7232                [(set DstVT.RC:$dst,
7233                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7234               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7235
7236  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7237                (ins DstVT.RC:$src1, x86memop:$src2),
7238                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7239                [(set DstVT.RC:$dst,
7240                      (OpNode (DstVT.VT DstVT.RC:$src1),
7241                               (ld_frag addr:$src2)))]>,
7242                EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7243}
7244  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7245                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7246                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7247}
7248
7249multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7250                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7251                               X86VectorVTInfo DstVT, string asm,
7252                               string mem> {
7253  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7254  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7255              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7256              !strconcat(asm,
7257                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7258              [(set DstVT.RC:$dst,
7259                    (OpNode (DstVT.VT DstVT.RC:$src1),
7260                             SrcRC:$src2,
7261                             (i32 timm:$rc)))]>,
7262              EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7263  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7264                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7265                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7266}
7267
7268multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7269                                X86FoldableSchedWrite sched,
7270                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7271                                X86MemOperand x86memop, PatFrag ld_frag,
7272                                string asm, string mem> {
7273  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7274              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7275                            ld_frag, asm, mem>, VEX_LIG;
7276}
7277
7278let Predicates = [HasAVX512] in {
7279defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7280                                 WriteCvtI2SS, GR32,
7281                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7282                                 TB, XS, EVEX_CD8<32, CD8VT1>;
7283defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7284                                 WriteCvtI2SS, GR64,
7285                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7286                                 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7287defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7288                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7289                                 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7290defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7291                                 WriteCvtI2SD, GR64,
7292                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7293                                 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7294
7295def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7296              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7297def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7298              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7299
7300def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7301          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7302def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7303          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7304def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7305          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7306def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7307          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7308
7309def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7310          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7311def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7312          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7313def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7314          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7315def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7316          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7317
7318defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7319                                  WriteCvtI2SS, GR32,
7320                                  v4f32x_info, i32mem, loadi32,
7321                                  "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7322defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7323                                  WriteCvtI2SS, GR64,
7324                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7325                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7326defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7327                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7328                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7329defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7330                                  WriteCvtI2SD, GR64,
7331                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7332                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7333
7334def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7335              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7336def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7337              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7338
7339def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7340          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7341def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7342          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7343def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7344          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7345def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7346          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7347
7348def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7349          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7350def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7351          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7352def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7353          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7354def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7355          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7356}
7357
7358//===----------------------------------------------------------------------===//
7359// AVX-512  Scalar convert from float/double to integer
7360//===----------------------------------------------------------------------===//
7361
7362multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7363                                  X86VectorVTInfo DstVT, SDNode OpNode,
7364                                  SDNode OpNodeRnd,
7365                                  X86FoldableSchedWrite sched, string asm,
7366                                  string aliasStr, Predicate prd = HasAVX512> {
7367  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7368    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7369                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7370                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7371                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7372    let Uses = [MXCSR] in
7373    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7374                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7375                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7376                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7377                 Sched<[sched]>;
7378    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7379                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7380                [(set DstVT.RC:$dst, (OpNode
7381                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7382                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7383  } // Predicates = [prd]
7384
7385  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7386          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7387  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7388          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7389  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7390          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7391                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7392}
7393
7394// Convert float/double to signed/unsigned int 32/64
7395defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7396                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7397                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7398defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7399                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7400                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7401defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7402                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7403                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7404defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7405                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7406                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7407defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7408                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7409                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7410defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7411                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7412                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7413defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7414                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7415                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7416defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7417                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7418                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7419
7420multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7421                        X86VectorVTInfo DstVT, SDNode OpNode,
7422                        X86FoldableSchedWrite sched> {
7423  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7424    let isCodeGenOnly = 1 in {
7425    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7426                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7427                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7428                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7429    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7430                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7431                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7432                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7433    }
7434  } // Predicates = [HasAVX512]
7435}
7436
7437defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7438                       lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7439defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7440                       llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7441defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7442                       lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7443defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7444                       llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7445
7446let Predicates = [HasAVX512] in {
7447  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7448  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7449
7450  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7451  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7452}
7453
7454// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7455// which produce unnecessary vmovs{s,d} instructions
7456let Predicates = [HasAVX512] in {
7457def : Pat<(v4f32 (X86Movss
7458                   (v4f32 VR128X:$dst),
7459                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7460          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7461
7462def : Pat<(v4f32 (X86Movss
7463                   (v4f32 VR128X:$dst),
7464                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7465          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7466
7467def : Pat<(v4f32 (X86Movss
7468                   (v4f32 VR128X:$dst),
7469                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7470          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7471
7472def : Pat<(v4f32 (X86Movss
7473                   (v4f32 VR128X:$dst),
7474                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7475          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7476
7477def : Pat<(v2f64 (X86Movsd
7478                   (v2f64 VR128X:$dst),
7479                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7480          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7481
7482def : Pat<(v2f64 (X86Movsd
7483                   (v2f64 VR128X:$dst),
7484                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7485          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7486
7487def : Pat<(v2f64 (X86Movsd
7488                   (v2f64 VR128X:$dst),
7489                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7490          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7491
7492def : Pat<(v2f64 (X86Movsd
7493                   (v2f64 VR128X:$dst),
7494                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7495          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7496
7497def : Pat<(v4f32 (X86Movss
7498                   (v4f32 VR128X:$dst),
7499                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7500          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7501
7502def : Pat<(v4f32 (X86Movss
7503                   (v4f32 VR128X:$dst),
7504                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7505          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7506
7507def : Pat<(v4f32 (X86Movss
7508                   (v4f32 VR128X:$dst),
7509                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7510          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7511
7512def : Pat<(v4f32 (X86Movss
7513                   (v4f32 VR128X:$dst),
7514                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7515          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7516
7517def : Pat<(v2f64 (X86Movsd
7518                   (v2f64 VR128X:$dst),
7519                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7520          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7521
7522def : Pat<(v2f64 (X86Movsd
7523                   (v2f64 VR128X:$dst),
7524                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7525          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7526
7527def : Pat<(v2f64 (X86Movsd
7528                   (v2f64 VR128X:$dst),
7529                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7530          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7531
7532def : Pat<(v2f64 (X86Movsd
7533                   (v2f64 VR128X:$dst),
7534                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7535          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7536} // Predicates = [HasAVX512]
7537
7538// Convert float/double to signed/unsigned int 32/64 with truncation
7539multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7540                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7541                            SDNode OpNodeInt, SDNode OpNodeSAE,
7542                            X86FoldableSchedWrite sched, string aliasStr,
7543                            Predicate prd = HasAVX512> {
7544let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7545  let isCodeGenOnly = 1 in {
7546  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7547              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7548              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7549              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7550  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7551              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7552              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7553              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7554  }
7555
7556  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7557            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7558           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7559           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7560  let Uses = [MXCSR] in
7561  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7562            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7563            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7564                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7565  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7566              (ins _SrcRC.IntScalarMemOp:$src),
7567              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7568              [(set _DstRC.RC:$dst,
7569                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7570              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7571} // Predicates = [prd]
7572
7573  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7574          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7575  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7576          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7577  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7578          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7579                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7580}
7581
7582defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7583                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7584                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7585defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7586                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7587                        "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7588defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7589                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7590                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7591defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7592                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7593                        "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7594
7595defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7596                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7597                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7598defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7599                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7600                        "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7601defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7602                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7603                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7604defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7605                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7606                        "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7607
7608//===----------------------------------------------------------------------===//
7609// AVX-512  Convert form float to double and back
7610//===----------------------------------------------------------------------===//
7611
7612let Uses = [MXCSR], mayRaiseFPException = 1 in
7613multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7614                                X86VectorVTInfo _Src, SDNode OpNode,
7615                                X86FoldableSchedWrite sched> {
7616  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7617                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7618                         "$src2, $src1", "$src1, $src2",
7619                         (_.VT (OpNode (_.VT _.RC:$src1),
7620                                       (_Src.VT _Src.RC:$src2)))>,
7621                         EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7622  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7623                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7624                         "$src2, $src1", "$src1, $src2",
7625                         (_.VT (OpNode (_.VT _.RC:$src1),
7626                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7627                         EVEX, VVVV, VEX_LIG,
7628                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7629
7630  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7631    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7632               (ins _.FRC:$src1, _Src.FRC:$src2),
7633               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7634               EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7635    let mayLoad = 1 in
7636    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7637               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7638               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7639               EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7640  }
7641}
7642
7643// Scalar Conversion with SAE - suppress all exceptions
7644multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7645                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7646                                    X86FoldableSchedWrite sched> {
7647  let Uses = [MXCSR] in
7648  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7649                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7650                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7651                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7652                                         (_Src.VT _Src.RC:$src2)))>,
7653                        EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7654}
7655
7656// Scalar Conversion with rounding control (RC)
7657multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7658                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7659                                   X86FoldableSchedWrite sched> {
7660  let Uses = [MXCSR] in
7661  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7662                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7663                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7664                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7665                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7666                        EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7667                        EVEX_B, EVEX_RC;
7668}
7669multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7670                                      SDNode OpNode, SDNode OpNodeRnd,
7671                                      X86FoldableSchedWrite sched,
7672                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7673                                      Predicate prd = HasAVX512> {
7674  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7675    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7676             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7677                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7678  }
7679}
7680
7681multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7682                                       SDNode OpNode, SDNode OpNodeSAE,
7683                                       X86FoldableSchedWrite sched,
7684                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685                                       Predicate prd = HasAVX512> {
7686  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7689             EVEX_CD8<_src.EltSize, CD8VT1>;
7690  }
7691}
7692defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7693                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7694                                         f32x_info>, TB, XD, REX_W;
7695defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7696                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7697                                          f64x_info>, TB, XS;
7698defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7699                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7700                                          f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7701defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7702                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7703                                          f64x_info, HasFP16>, T_MAP5, XS;
7704defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7705                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7706                                          f16x_info, HasFP16>, T_MAP5;
7707defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7708                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7709                                          f32x_info, HasFP16>, T_MAP6;
7710
7711def : Pat<(f64 (any_fpextend FR32X:$src)),
7712          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7713          Requires<[HasAVX512]>;
7714def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7715          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7716          Requires<[HasAVX512, OptForSize]>;
7717
7718def : Pat<(f32 (any_fpround FR64X:$src)),
7719          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7720           Requires<[HasAVX512]>;
7721
7722def : Pat<(f32 (any_fpextend FR16X:$src)),
7723          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7724          Requires<[HasFP16]>;
7725def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7726          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7727          Requires<[HasFP16, OptForSize]>;
7728
7729def : Pat<(f64 (any_fpextend FR16X:$src)),
7730          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7731          Requires<[HasFP16]>;
7732def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7733          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7734          Requires<[HasFP16, OptForSize]>;
7735
7736def : Pat<(f16 (any_fpround FR32X:$src)),
7737          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7738           Requires<[HasFP16]>;
7739def : Pat<(f16 (any_fpround FR64X:$src)),
7740          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7741           Requires<[HasFP16]>;
7742
7743def : Pat<(v4f32 (X86Movss
7744                   (v4f32 VR128X:$dst),
7745                   (v4f32 (scalar_to_vector
7746                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7747          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7748          Requires<[HasAVX512]>;
7749
7750def : Pat<(v2f64 (X86Movsd
7751                   (v2f64 VR128X:$dst),
7752                   (v2f64 (scalar_to_vector
7753                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7754          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7755          Requires<[HasAVX512]>;
7756
7757//===----------------------------------------------------------------------===//
7758// AVX-512  Vector convert from signed/unsigned integer to float/double
7759//          and from float/double to signed/unsigned integer
7760//===----------------------------------------------------------------------===//
7761
7762multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7763                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7764                          X86FoldableSchedWrite sched,
7765                          string Broadcast = _.BroadcastStr,
7766                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7767                          RegisterClass MaskRC = _.KRCWM,
7768                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7769                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7770let Uses = [MXCSR], mayRaiseFPException = 1 in {
7771  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7772                         (ins _Src.RC:$src),
7773                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7774                         (ins MaskRC:$mask, _Src.RC:$src),
7775                          OpcodeStr, "$src", "$src",
7776                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7777                         (vselect_mask MaskRC:$mask,
7778                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7779                                       _.RC:$src0),
7780                         (vselect_mask MaskRC:$mask,
7781                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7782                                       _.ImmAllZerosV)>,
7783                         EVEX, Sched<[sched]>;
7784
7785  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7786                         (ins MemOp:$src),
7787                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7788                         (ins MaskRC:$mask, MemOp:$src),
7789                         OpcodeStr#Alias, "$src", "$src",
7790                         LdDAG,
7791                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7792                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7793                         EVEX, Sched<[sched.Folded]>;
7794
7795  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7796                         (ins _Src.ScalarMemOp:$src),
7797                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7798                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7799                         OpcodeStr,
7800                         "${src}"#Broadcast, "${src}"#Broadcast,
7801                         (_.VT (OpNode (_Src.VT
7802                                  (_Src.BroadcastLdFrag addr:$src))
7803                            )),
7804                         (vselect_mask MaskRC:$mask,
7805                                       (_.VT
7806                                        (MaskOpNode
7807                                         (_Src.VT
7808                                          (_Src.BroadcastLdFrag addr:$src)))),
7809                                       _.RC:$src0),
7810                         (vselect_mask MaskRC:$mask,
7811                                       (_.VT
7812                                        (MaskOpNode
7813                                         (_Src.VT
7814                                          (_Src.BroadcastLdFrag addr:$src)))),
7815                                       _.ImmAllZerosV)>,
7816                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7817  }
7818}
7819// Conversion with SAE - suppress all exceptions
7820multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7821                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7822                              X86FoldableSchedWrite sched> {
7823  let Uses = [MXCSR] in
7824  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7825                        (ins _Src.RC:$src), OpcodeStr,
7826                        "{sae}, $src", "$src, {sae}",
7827                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7828                        EVEX, EVEX_B, Sched<[sched]>;
7829}
7830
7831// Conversion with rounding control (RC)
7832multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7834                         X86FoldableSchedWrite sched> {
7835  let Uses = [MXCSR] in
7836  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7838                        "$rc, $src", "$src, $rc",
7839                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7840                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7841}
7842
7843// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7844multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
7846                                SDNode MaskOpNode,
7847                                X86FoldableSchedWrite sched,
7848                                string Broadcast = _.BroadcastStr,
7849                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7850                                RegisterClass MaskRC = _.KRCWM>
7851  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7852                   Alias, MemOp, MaskRC,
7853                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7854                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7855
7856// Extend [Float to Double, Half to Float]
7857multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7858                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7859                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7860  let Predicates = [prd] in {
7861    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7862                            any_fpextend, fpextend, sched.ZMM>,
7863             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7864                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7865  }
7866  let Predicates = [prd, HasVLX] in {
7867    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7868                               X86any_vfpext, X86vfpext, sched.XMM,
7869                               _dst.info128.BroadcastStr,
7870                               "", f64mem>, EVEX_V128;
7871    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7872                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7873  }
7874}
7875
7876// Truncate [Double to Float, Float to Half]
7877multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7878                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7879                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7880                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7881                            PatFrag loadVT128 = _src.info128.LdFrag,
7882                            RegisterClass maskRC128 = _src.info128.KRCWM> {
7883  let Predicates = [prd] in {
7884    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7885                            X86any_vfpround, X86vfpround, sched.ZMM>,
7886             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7887                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7888  }
7889  let Predicates = [prd, HasVLX] in {
7890    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7891                               null_frag, null_frag, sched.XMM,
7892                               _src.info128.BroadcastStr, "{x}",
7893                               f128mem, maskRC128>, EVEX_V128;
7894    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7895                               X86any_vfpround, X86vfpround,
7896                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7897
7898    // Special patterns to allow use of X86vmfpround for masking. Instruction
7899    // patterns have been disabled with null_frag.
7900    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7901              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7902    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7903                            maskRC128:$mask),
7904              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7905    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7906                            maskRC128:$mask),
7907              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7908
7909    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7910              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7911    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7912                            maskRC128:$mask),
7913              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7914    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7915                            maskRC128:$mask),
7916              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7917
7918    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7919              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7920    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7921                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7922              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7923    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7924                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
7925              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7926  }
7927
7928  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7929                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7930  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7931                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7932                  VK2WM:$mask, VR128X:$src), 0, "att">;
7933  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7934                  "$dst {${mask}} {z}, $src}",
7935                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7936                  VK2WM:$mask, VR128X:$src), 0, "att">;
7937  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7938                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7939  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7940                  "$dst {${mask}}, ${src}{1to2}}",
7941                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7942                  VK2WM:$mask, f64mem:$src), 0, "att">;
7943  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7944                  "$dst {${mask}} {z}, ${src}{1to2}}",
7945                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7946                  VK2WM:$mask, f64mem:$src), 0, "att">;
7947
7948  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7949                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7950  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7951                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7952                  VK4WM:$mask, VR256X:$src), 0, "att">;
7953  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7954                  "$dst {${mask}} {z}, $src}",
7955                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7956                  VK4WM:$mask, VR256X:$src), 0, "att">;
7957  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7958                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7959  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7960                  "$dst {${mask}}, ${src}{1to4}}",
7961                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7962                  VK4WM:$mask, f64mem:$src), 0, "att">;
7963  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7964                  "$dst {${mask}} {z}, ${src}{1to4}}",
7965                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7966                  VK4WM:$mask, f64mem:$src), 0, "att">;
7967}
7968
7969defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7970                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7971                                  REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7972defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7973                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7974                                   TB, EVEX_CD8<32, CD8VH>;
7975
7976// Extend Half to Double
7977multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7978                            X86SchedWriteWidths sched> {
7979  let Predicates = [HasFP16] in {
7980    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7981                                  any_fpextend, fpextend, sched.ZMM>,
7982             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7983                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7984    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7985                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7986  }
7987  let Predicates = [HasFP16, HasVLX] in {
7988    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
7989                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
7990                                     f32mem>, EVEX_V128;
7991    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
7992                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
7993                                     f64mem>, EVEX_V256;
7994  }
7995}
7996
7997// Truncate Double to Half
7998multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7999  let Predicates = [HasFP16] in {
8000    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8001                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8002             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8003                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8004  }
8005  let Predicates = [HasFP16, HasVLX] in {
8006    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8007                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8008                               VK2WM>, EVEX_V128;
8009    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8010                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8011                               VK4WM>, EVEX_V256;
8012  }
8013  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8014                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8015                  VR128X:$src), 0, "att">;
8016  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8017                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8018                  VK2WM:$mask, VR128X:$src), 0, "att">;
8019  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8020                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8021                  VK2WM:$mask, VR128X:$src), 0, "att">;
8022  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8023                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8024                  i64mem:$src), 0, "att">;
8025  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8026                  "$dst {${mask}}, ${src}{1to2}}",
8027                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8028                  VK2WM:$mask, i64mem:$src), 0, "att">;
8029  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8030                  "$dst {${mask}} {z}, ${src}{1to2}}",
8031                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8032                  VK2WM:$mask, i64mem:$src), 0, "att">;
8033
8034  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8035                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8036                  VR256X:$src), 0, "att">;
8037  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8038                  "$dst {${mask}}, $src}",
8039                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8040                  VK4WM:$mask, VR256X:$src), 0, "att">;
8041  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8042                  "$dst {${mask}} {z}, $src}",
8043                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8044                  VK4WM:$mask, VR256X:$src), 0, "att">;
8045  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8046                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8047                  i64mem:$src), 0, "att">;
8048  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8049                  "$dst {${mask}}, ${src}{1to4}}",
8050                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8051                  VK4WM:$mask, i64mem:$src), 0, "att">;
8052  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8053                  "$dst {${mask}} {z}, ${src}{1to4}}",
8054                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8055                  VK4WM:$mask, i64mem:$src), 0, "att">;
8056
8057  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8058                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8059                  VR512:$src), 0, "att">;
8060  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8061                  "$dst {${mask}}, $src}",
8062                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8063                  VK8WM:$mask, VR512:$src), 0, "att">;
8064  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8065                  "$dst {${mask}} {z}, $src}",
8066                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8067                  VK8WM:$mask, VR512:$src), 0, "att">;
8068  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8069                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8070                  i64mem:$src), 0, "att">;
8071  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8072                  "$dst {${mask}}, ${src}{1to8}}",
8073                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8074                  VK8WM:$mask, i64mem:$src), 0, "att">;
8075  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8076                  "$dst {${mask}} {z}, ${src}{1to8}}",
8077                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8078                  VK8WM:$mask, i64mem:$src), 0, "att">;
8079}
8080
8081defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8082                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8083                                   HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8084defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8085                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8086                                    HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8087defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8088                                 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8089defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8090                                 T_MAP5, EVEX_CD8<16, CD8VQ>;
8091
8092let Predicates = [HasFP16, HasVLX] in {
8093  // Special patterns to allow use of X86vmfpround for masking. Instruction
8094  // patterns have been disabled with null_frag.
8095  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8096            (VCVTPD2PHZ256rr VR256X:$src)>;
8097  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8098                          VK4WM:$mask)),
8099            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8100  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8101                          VK4WM:$mask),
8102            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8103
8104  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8105            (VCVTPD2PHZ256rm addr:$src)>;
8106  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8107                          VK4WM:$mask),
8108            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8109  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8110                          VK4WM:$mask),
8111            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8112
8113  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8114            (VCVTPD2PHZ256rmb addr:$src)>;
8115  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8116                          (v8f16 VR128X:$src0), VK4WM:$mask),
8117            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8118  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8119                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8120            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8121
8122  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8123            (VCVTPD2PHZ128rr VR128X:$src)>;
8124  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8125                          VK2WM:$mask),
8126            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8127  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8128                          VK2WM:$mask),
8129            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8130
8131  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8132            (VCVTPD2PHZ128rm addr:$src)>;
8133  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8134                          VK2WM:$mask),
8135            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8136  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8137                          VK2WM:$mask),
8138            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8139
8140  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8141            (VCVTPD2PHZ128rmb addr:$src)>;
8142  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8143                          (v8f16 VR128X:$src0), VK2WM:$mask),
8144            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8145  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8146                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8147            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8148}
8149
8150// Convert Signed/Unsigned Doubleword to Double
8151let Uses = []<Register>, mayRaiseFPException = 0 in
8152multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8153                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8154                           SDNode MaskOpNode128,
8155                           X86SchedWriteWidths sched> {
8156  // No rounding in this op
8157  let Predicates = [HasAVX512] in
8158    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8159                            MaskOpNode, sched.ZMM>, EVEX_V512;
8160
8161  let Predicates = [HasVLX] in {
8162    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8163                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8164                               "", i64mem, VK2WM,
8165                               (v2f64 (OpNode128 (bc_v4i32
8166                                (v2i64
8167                                 (scalar_to_vector (loadi64 addr:$src)))))),
8168                               (v2f64 (MaskOpNode128 (bc_v4i32
8169                                (v2i64
8170                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8171                               EVEX_V128;
8172    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8173                               MaskOpNode, sched.YMM>, EVEX_V256;
8174  }
8175}
8176
8177// Convert Signed/Unsigned Doubleword to Float
8178multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8179                           SDNode MaskOpNode, SDNode OpNodeRnd,
8180                           X86SchedWriteWidths sched> {
8181  let Predicates = [HasAVX512] in
8182    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8183                            MaskOpNode, sched.ZMM>,
8184             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8185                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8186
8187  let Predicates = [HasVLX] in {
8188    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8189                               MaskOpNode, sched.XMM>, EVEX_V128;
8190    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8191                               MaskOpNode, sched.YMM>, EVEX_V256;
8192  }
8193}
8194
8195// Convert Float to Signed/Unsigned Doubleword with truncation
8196multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8197                            SDNode MaskOpNode,
8198                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8199  let Predicates = [HasAVX512] in {
8200    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8201                            MaskOpNode, sched.ZMM>,
8202             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8203                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8204  }
8205  let Predicates = [HasVLX] in {
8206    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8207                               MaskOpNode, sched.XMM>, EVEX_V128;
8208    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8209                               MaskOpNode, sched.YMM>, EVEX_V256;
8210  }
8211}
8212
8213// Convert Float to Signed/Unsigned Doubleword
8214multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8215                           SDNode MaskOpNode, SDNode OpNodeRnd,
8216                           X86SchedWriteWidths sched> {
8217  let Predicates = [HasAVX512] in {
8218    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8219                            MaskOpNode, sched.ZMM>,
8220             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8221                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8222  }
8223  let Predicates = [HasVLX] in {
8224    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8225                               MaskOpNode, sched.XMM>, EVEX_V128;
8226    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8227                               MaskOpNode, sched.YMM>, EVEX_V256;
8228  }
8229}
8230
8231// Convert Double to Signed/Unsigned Doubleword with truncation
8232multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8233                            SDNode MaskOpNode, SDNode OpNodeSAE,
8234                            X86SchedWriteWidths sched> {
8235  let Predicates = [HasAVX512] in {
8236    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8237                            MaskOpNode, sched.ZMM>,
8238             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8239                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8240  }
8241  let Predicates = [HasVLX] in {
8242    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8243    // memory forms of these instructions in Asm Parser. They have the same
8244    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8245    // due to the same reason.
8246    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8247                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8248                               VK2WM>, EVEX_V128;
8249    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8250                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8251  }
8252
8253  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8254                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8255                  VR128X:$src), 0, "att">;
8256  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8257                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8258                  VK2WM:$mask, VR128X:$src), 0, "att">;
8259  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8260                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8261                  VK2WM:$mask, VR128X:$src), 0, "att">;
8262  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8263                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8264                  f64mem:$src), 0, "att">;
8265  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8266                  "$dst {${mask}}, ${src}{1to2}}",
8267                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8268                  VK2WM:$mask, f64mem:$src), 0, "att">;
8269  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8270                  "$dst {${mask}} {z}, ${src}{1to2}}",
8271                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8272                  VK2WM:$mask, f64mem:$src), 0, "att">;
8273
8274  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8275                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8276                  VR256X:$src), 0, "att">;
8277  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8278                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8279                  VK4WM:$mask, VR256X:$src), 0, "att">;
8280  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8281                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8282                  VK4WM:$mask, VR256X:$src), 0, "att">;
8283  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8284                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8285                  f64mem:$src), 0, "att">;
8286  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8287                  "$dst {${mask}}, ${src}{1to4}}",
8288                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8289                  VK4WM:$mask, f64mem:$src), 0, "att">;
8290  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8291                  "$dst {${mask}} {z}, ${src}{1to4}}",
8292                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8293                  VK4WM:$mask, f64mem:$src), 0, "att">;
8294}
8295
8296// Convert Double to Signed/Unsigned Doubleword
8297multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8298                           SDNode MaskOpNode, SDNode OpNodeRnd,
8299                           X86SchedWriteWidths sched> {
8300  let Predicates = [HasAVX512] in {
8301    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8302                            MaskOpNode, sched.ZMM>,
8303             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8304                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8305  }
8306  let Predicates = [HasVLX] in {
8307    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8308    // memory forms of these instructions in Asm Parcer. They have the same
8309    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8310    // due to the same reason.
8311    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8312                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8313                               VK2WM>, EVEX_V128;
8314    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8315                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8316  }
8317
8318  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8319                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8320  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8321                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8322                  VK2WM:$mask, VR128X:$src), 0, "att">;
8323  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8324                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8325                  VK2WM:$mask, VR128X:$src), 0, "att">;
8326  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8327                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8328                  f64mem:$src), 0, "att">;
8329  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8330                  "$dst {${mask}}, ${src}{1to2}}",
8331                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8332                  VK2WM:$mask, f64mem:$src), 0, "att">;
8333  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8334                  "$dst {${mask}} {z}, ${src}{1to2}}",
8335                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8336                  VK2WM:$mask, f64mem:$src), 0, "att">;
8337
8338  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8339                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8340  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8341                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8342                  VK4WM:$mask, VR256X:$src), 0, "att">;
8343  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8344                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8345                  VK4WM:$mask, VR256X:$src), 0, "att">;
8346  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8347                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8348                  f64mem:$src), 0, "att">;
8349  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8350                  "$dst {${mask}}, ${src}{1to4}}",
8351                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8352                  VK4WM:$mask, f64mem:$src), 0, "att">;
8353  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8354                  "$dst {${mask}} {z}, ${src}{1to4}}",
8355                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8356                  VK4WM:$mask, f64mem:$src), 0, "att">;
8357}
8358
8359// Convert Double to Signed/Unsigned Quardword
8360multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8361                           SDNode MaskOpNode, SDNode OpNodeRnd,
8362                           X86SchedWriteWidths sched> {
8363  let Predicates = [HasDQI] in {
8364    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8365                            MaskOpNode, sched.ZMM>,
8366             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8367                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8368  }
8369  let Predicates = [HasDQI, HasVLX] in {
8370    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8371                               MaskOpNode, sched.XMM>, EVEX_V128;
8372    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8373                               MaskOpNode, sched.YMM>, EVEX_V256;
8374  }
8375}
8376
8377// Convert Double to Signed/Unsigned Quardword with truncation
8378multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8379                            SDNode MaskOpNode, SDNode OpNodeRnd,
8380                            X86SchedWriteWidths sched> {
8381  let Predicates = [HasDQI] in {
8382    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8383                            MaskOpNode, sched.ZMM>,
8384             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8385                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8386  }
8387  let Predicates = [HasDQI, HasVLX] in {
8388    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8389                               MaskOpNode, sched.XMM>, EVEX_V128;
8390    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8391                               MaskOpNode, sched.YMM>, EVEX_V256;
8392  }
8393}
8394
8395// Convert Signed/Unsigned Quardword to Double
8396multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8397                           SDNode MaskOpNode, SDNode OpNodeRnd,
8398                           X86SchedWriteWidths sched> {
8399  let Predicates = [HasDQI] in {
8400    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8401                            MaskOpNode, sched.ZMM>,
8402             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8403                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8404  }
8405  let Predicates = [HasDQI, HasVLX] in {
8406    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8407                               MaskOpNode, sched.XMM>, EVEX_V128;
8408    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8409                               MaskOpNode, sched.YMM>, EVEX_V256;
8410  }
8411}
8412
8413// Convert Float to Signed/Unsigned Quardword
8414multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8415                           SDNode MaskOpNode, SDNode OpNodeRnd,
8416                           X86SchedWriteWidths sched> {
8417  let Predicates = [HasDQI] in {
8418    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8419                            MaskOpNode, sched.ZMM>,
8420             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8421                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8422  }
8423  let Predicates = [HasDQI, HasVLX] in {
8424    // Explicitly specified broadcast string, since we take only 2 elements
8425    // from v4f32x_info source
8426    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8427                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8428                               (v2i64 (OpNode (bc_v4f32
8429                                (v2f64
8430                                 (scalar_to_vector (loadf64 addr:$src)))))),
8431                               (v2i64 (MaskOpNode (bc_v4f32
8432                                (v2f64
8433                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8434                               EVEX_V128;
8435    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8436                               MaskOpNode, sched.YMM>, EVEX_V256;
8437  }
8438}
8439
8440// Convert Float to Signed/Unsigned Quardword with truncation
8441multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8442                            SDNode MaskOpNode, SDNode OpNodeRnd,
8443                            X86SchedWriteWidths sched> {
8444  let Predicates = [HasDQI] in {
8445    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8446                            MaskOpNode, sched.ZMM>,
8447             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8448                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8449  }
8450  let Predicates = [HasDQI, HasVLX] in {
8451    // Explicitly specified broadcast string, since we take only 2 elements
8452    // from v4f32x_info source
8453    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8454                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8455                               (v2i64 (OpNode (bc_v4f32
8456                                (v2f64
8457                                 (scalar_to_vector (loadf64 addr:$src)))))),
8458                               (v2i64 (MaskOpNode (bc_v4f32
8459                                (v2f64
8460                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8461                               EVEX_V128;
8462    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8463                               MaskOpNode, sched.YMM>, EVEX_V256;
8464  }
8465}
8466
8467// Convert Signed/Unsigned Quardword to Float
8468// Also Convert Signed/Unsigned Doubleword to Half
8469multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8470                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8471                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8472                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8473                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8474  let Predicates = [prd] in {
8475    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8476                            MaskOpNode, sched.ZMM>,
8477             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8478                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8479  }
8480  let Predicates = [prd, HasVLX] in {
8481    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8482    // memory forms of these instructions in Asm Parcer. They have the same
8483    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8484    // due to the same reason.
8485    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8486                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8487                               "{x}", i128mem, _src.info128.KRCWM>,
8488                               EVEX_V128;
8489    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8490                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8491                               "{y}">, EVEX_V256;
8492
8493    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8494    // patterns have been disabled with null_frag.
8495    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8496              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8497    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8498                             _src.info128.KRCWM:$mask),
8499              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8500    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8501                             _src.info128.KRCWM:$mask),
8502              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8503
8504    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8505              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8506    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8507                             _src.info128.KRCWM:$mask),
8508              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8509    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8510                             _src.info128.KRCWM:$mask),
8511              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8512
8513    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8514              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8515    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8516                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8517              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8518    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8519                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8520              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8521  }
8522
8523  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8524                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8525                  VR128X:$src), 0, "att">;
8526  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8527                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8528                  VK2WM:$mask, VR128X:$src), 0, "att">;
8529  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8530                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8531                  VK2WM:$mask, VR128X:$src), 0, "att">;
8532  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8533                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8534                  i64mem:$src), 0, "att">;
8535  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8536                  "$dst {${mask}}, ${src}{1to2}}",
8537                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8538                  VK2WM:$mask, i64mem:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8540                  "$dst {${mask}} {z}, ${src}{1to2}}",
8541                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8542                  VK2WM:$mask, i64mem:$src), 0, "att">;
8543
8544  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8545                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8546                  VR256X:$src), 0, "att">;
8547  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8548                  "$dst {${mask}}, $src}",
8549                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8550                  VK4WM:$mask, VR256X:$src), 0, "att">;
8551  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8552                  "$dst {${mask}} {z}, $src}",
8553                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8554                  VK4WM:$mask, VR256X:$src), 0, "att">;
8555  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8556                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8557                  i64mem:$src), 0, "att">;
8558  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8559                  "$dst {${mask}}, ${src}{1to4}}",
8560                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8561                  VK4WM:$mask, i64mem:$src), 0, "att">;
8562  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8563                  "$dst {${mask}} {z}, ${src}{1to4}}",
8564                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8565                  VK4WM:$mask, i64mem:$src), 0, "att">;
8566}
8567
8568defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8569                                 X86any_VSintToFP, X86VSintToFP,
8570                                 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8571
8572defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8573                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8574                                TB, EVEX_CD8<32, CD8VF>;
8575
8576defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8577                                 X86cvttp2si, X86cvttp2siSAE,
8578                                 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8579
8580defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8581                                 X86cvttp2si, X86cvttp2siSAE,
8582                                 SchedWriteCvtPD2DQ>,
8583                                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8584
8585defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8586                                 X86cvttp2ui, X86cvttp2uiSAE,
8587                                 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8588
8589defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8590                                 X86cvttp2ui, X86cvttp2uiSAE,
8591                                 SchedWriteCvtPD2DQ>,
8592                                 TB, REX_W, EVEX_CD8<64, CD8VF>;
8593
8594defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8595                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8596                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8597
8598defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8599                                 uint_to_fp, X86VUintToFpRnd,
8600                                 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8601
8602defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8603                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8604                                 EVEX_CD8<32, CD8VF>;
8605
8606defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8607                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8608                                 REX_W, EVEX_CD8<64, CD8VF>;
8609
8610defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8611                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8612                                 TB, EVEX_CD8<32, CD8VF>;
8613
8614defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8615                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8616                                 TB, EVEX_CD8<64, CD8VF>;
8617
8618defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8619                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8620                                 TB, PD, EVEX_CD8<64, CD8VF>;
8621
8622defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8623                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8624                                 EVEX_CD8<32, CD8VH>;
8625
8626defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8627                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628                                 TB, PD, EVEX_CD8<64, CD8VF>;
8629
8630defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8631                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8632                                 EVEX_CD8<32, CD8VH>;
8633
8634defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8635                                 X86cvttp2si, X86cvttp2siSAE,
8636                                 SchedWriteCvtPD2DQ>, REX_W,
8637                                 TB, PD, EVEX_CD8<64, CD8VF>;
8638
8639defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8640                                 X86cvttp2si, X86cvttp2siSAE,
8641                                 SchedWriteCvtPS2DQ>, TB, PD,
8642                                 EVEX_CD8<32, CD8VH>;
8643
8644defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8645                                 X86cvttp2ui, X86cvttp2uiSAE,
8646                                 SchedWriteCvtPD2DQ>, REX_W,
8647                                 TB, PD, EVEX_CD8<64, CD8VF>;
8648
8649defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8650                                 X86cvttp2ui, X86cvttp2uiSAE,
8651                                 SchedWriteCvtPS2DQ>, TB, PD,
8652                                 EVEX_CD8<32, CD8VH>;
8653
8654defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8655                            sint_to_fp, X86VSintToFpRnd,
8656                            SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8657
8658defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8659                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8660                            REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8661
8662defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8663                            X86any_VSintToFP, X86VMSintToFP,
8664                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8665                            SchedWriteCvtDQ2PS, HasFP16>,
8666                            T_MAP5, EVEX_CD8<32, CD8VF>;
8667
8668defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8669                            X86any_VUintToFP, X86VMUintToFP,
8670                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8671                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8672                            EVEX_CD8<32, CD8VF>;
8673
8674defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8675                            X86any_VSintToFP, X86VMSintToFP,
8676                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8677                            SchedWriteCvtDQ2PS>, REX_W, TB,
8678                            EVEX_CD8<64, CD8VF>;
8679
8680defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8681                            X86any_VUintToFP, X86VMUintToFP,
8682                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8683                            SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8684                            EVEX_CD8<64, CD8VF>;
8685
8686let Predicates = [HasVLX] in {
8687  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8688  // patterns have been disabled with null_frag.
8689  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8690            (VCVTPD2DQZ128rr VR128X:$src)>;
8691  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8692                          VK2WM:$mask),
8693            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8694  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8695                          VK2WM:$mask),
8696            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8697
8698  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8699            (VCVTPD2DQZ128rm addr:$src)>;
8700  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8701                          VK2WM:$mask),
8702            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8703  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8704                          VK2WM:$mask),
8705            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8706
8707  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8708            (VCVTPD2DQZ128rmb addr:$src)>;
8709  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8710                          (v4i32 VR128X:$src0), VK2WM:$mask),
8711            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8712  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8713                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8714            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8715
8716  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8717  // patterns have been disabled with null_frag.
8718  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8719            (VCVTTPD2DQZ128rr VR128X:$src)>;
8720  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8721                          VK2WM:$mask),
8722            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8723  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8724                          VK2WM:$mask),
8725            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8726
8727  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8728            (VCVTTPD2DQZ128rm addr:$src)>;
8729  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8730                          VK2WM:$mask),
8731            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8732  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8733                          VK2WM:$mask),
8734            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8735
8736  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8737            (VCVTTPD2DQZ128rmb addr:$src)>;
8738  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8739                          (v4i32 VR128X:$src0), VK2WM:$mask),
8740            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8741  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8742                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8743            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8744
8745  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8746  // patterns have been disabled with null_frag.
8747  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8748            (VCVTPD2UDQZ128rr VR128X:$src)>;
8749  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8750                           VK2WM:$mask),
8751            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8752  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8753                           VK2WM:$mask),
8754            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8755
8756  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8757            (VCVTPD2UDQZ128rm addr:$src)>;
8758  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8759                           VK2WM:$mask),
8760            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8761  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8762                           VK2WM:$mask),
8763            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8764
8765  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8766            (VCVTPD2UDQZ128rmb addr:$src)>;
8767  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8768                           (v4i32 VR128X:$src0), VK2WM:$mask),
8769            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8770  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8771                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8772            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8773
8774  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8775  // patterns have been disabled with null_frag.
8776  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8777            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8778  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8779                          VK2WM:$mask),
8780            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8781  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8782                          VK2WM:$mask),
8783            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8784
8785  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8786            (VCVTTPD2UDQZ128rm addr:$src)>;
8787  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8788                          VK2WM:$mask),
8789            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8790  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8791                          VK2WM:$mask),
8792            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8793
8794  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8795            (VCVTTPD2UDQZ128rmb addr:$src)>;
8796  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8797                          (v4i32 VR128X:$src0), VK2WM:$mask),
8798            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8799  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8800                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8801            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8802}
8803
8804let Predicates = [HasDQI, HasVLX] in {
8805  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8806            (VCVTPS2QQZ128rm addr:$src)>;
8807  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8808                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8809                                 VR128X:$src0)),
8810            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8812                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8813                                 v2i64x_info.ImmAllZerosV)),
8814            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8815
8816  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8817            (VCVTPS2UQQZ128rm addr:$src)>;
8818  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8819                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8820                                 VR128X:$src0)),
8821            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8822  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8823                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8824                                 v2i64x_info.ImmAllZerosV)),
8825            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8826
8827  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8828            (VCVTTPS2QQZ128rm addr:$src)>;
8829  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8830                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8831                                 VR128X:$src0)),
8832            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8833  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8834                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8835                                 v2i64x_info.ImmAllZerosV)),
8836            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8837
8838  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8839            (VCVTTPS2UQQZ128rm addr:$src)>;
8840  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8841                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8842                                 VR128X:$src0)),
8843            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8844  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8845                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8846                                 v2i64x_info.ImmAllZerosV)),
8847            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8848}
8849
8850let Predicates = [HasVLX] in {
8851  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8852            (VCVTDQ2PDZ128rm addr:$src)>;
8853  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8854                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8855                                 VR128X:$src0)),
8856            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8857  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8858                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8859                                 v2f64x_info.ImmAllZerosV)),
8860            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8861
8862  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8863            (VCVTUDQ2PDZ128rm addr:$src)>;
8864  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8865                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8866                                 VR128X:$src0)),
8867            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8868  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8869                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8870                                 v2f64x_info.ImmAllZerosV)),
8871            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8872}
8873
8874//===----------------------------------------------------------------------===//
8875// Half precision conversion instructions
8876//===----------------------------------------------------------------------===//
8877
8878let Uses = [MXCSR], mayRaiseFPException = 1 in
8879multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8880                           X86MemOperand x86memop, dag ld_dag,
8881                           X86FoldableSchedWrite sched> {
8882  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8883                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8884                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8885                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8886                            T8, PD, Sched<[sched]>;
8887  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8888                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8889                            (X86any_cvtph2ps (_src.VT ld_dag)),
8890                            (X86cvtph2ps (_src.VT ld_dag))>,
8891                            T8, PD, Sched<[sched.Folded]>;
8892}
8893
8894multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8895                               X86FoldableSchedWrite sched> {
8896  let Uses = [MXCSR] in
8897  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8898                             (ins _src.RC:$src), "vcvtph2ps",
8899                             "{sae}, $src", "$src, {sae}",
8900                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8901                             T8, PD, EVEX_B, Sched<[sched]>;
8902}
8903
8904let Predicates = [HasAVX512] in
8905  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8906                                    (load addr:$src), WriteCvtPH2PSZ>,
8907                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8908                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8909
8910let Predicates = [HasVLX] in {
8911  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8912                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8913                       EVEX_CD8<32, CD8VH>;
8914  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8915                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8916                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8917                       EVEX_CD8<32, CD8VH>;
8918
8919  // Pattern match vcvtph2ps of a scalar i64 load.
8920  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8921              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8922            (VCVTPH2PSZ128rm addr:$src)>;
8923}
8924
8925multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8926                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8927let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8928  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8929             (ins _src.RC:$src1, i32u8imm:$src2),
8930             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8931             [(set _dest.RC:$dst,
8932                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8933             Sched<[RR]>;
8934  let Constraints = "$src0 = $dst" in
8935  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8936             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8937             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8938             [(set _dest.RC:$dst,
8939                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8940                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8941             Sched<[RR]>, EVEX_K;
8942  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8943             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8944             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8945             [(set _dest.RC:$dst,
8946                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8947                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8948             Sched<[RR]>, EVEX_KZ;
8949  let hasSideEffects = 0, mayStore = 1 in {
8950    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8951               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8952               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8953               Sched<[MR]>;
8954    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8955               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8956               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8957                EVEX_K, Sched<[MR]>;
8958  }
8959}
8960}
8961
8962multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8963                               SchedWrite Sched> {
8964  let hasSideEffects = 0, Uses = [MXCSR] in {
8965    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8966              (ins _src.RC:$src1, i32u8imm:$src2),
8967              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
8968              [(set _dest.RC:$dst,
8969                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8970              EVEX_B, Sched<[Sched]>;
8971    let Constraints = "$src0 = $dst" in
8972    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8973              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8974              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
8975              [(set _dest.RC:$dst,
8976                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8977                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8978              EVEX_B, Sched<[Sched]>, EVEX_K;
8979    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8980              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8981              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
8982              [(set _dest.RC:$dst,
8983                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8984                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8985              EVEX_B, Sched<[Sched]>, EVEX_KZ;
8986}
8987}
8988
8989let Predicates = [HasAVX512] in {
8990  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8991                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8992                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8993                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8994
8995  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8996            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8997}
8998
8999let Predicates = [HasVLX] in {
9000  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9001                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9002                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9003  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9004                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9005                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9006
9007  def : Pat<(store (f64 (extractelt
9008                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9009                         (iPTR 0))), addr:$dst),
9010            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9011  def : Pat<(store (i64 (extractelt
9012                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9013                         (iPTR 0))), addr:$dst),
9014            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9015  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9016            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9017}
9018
9019//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9020multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9021                              string OpcodeStr, Domain d,
9022                              X86FoldableSchedWrite sched = WriteFComX> {
9023  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9024  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9025                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9026                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9027}
9028
9029let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9030  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9031                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9032  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9033                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9034  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9035                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9036  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9037                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9038}
9039
9040let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9041  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9042                                 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9043                                 EVEX_CD8<32, CD8VT1>;
9044  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9045                                  "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9046                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9047  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9048                                 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9049                                 EVEX_CD8<32, CD8VT1>;
9050  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9051                                 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9052                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9053  let isCodeGenOnly = 1 in {
9054    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9055                          sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9056                          EVEX_CD8<32, CD8VT1>;
9057    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9058                          sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9059                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9060
9061    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9062                          sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9063                          EVEX_CD8<32, CD8VT1>;
9064    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9065                          sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9066                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9067  }
9068}
9069
9070let Defs = [EFLAGS], Predicates = [HasFP16] in {
9071  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9072                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9073                                EVEX_CD8<16, CD8VT1>;
9074  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9075                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9076                                EVEX_CD8<16, CD8VT1>;
9077  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9078                                "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9079                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9080  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9081                                "comish", SSEPackedSingle>, T_MAP5, EVEX,
9082                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9083  let isCodeGenOnly = 1 in {
9084    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9085                                sse_load_f16, "ucomish", SSEPackedSingle>,
9086                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9087
9088    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9089                                sse_load_f16, "comish", SSEPackedSingle>,
9090                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9091  }
9092}
9093
9094/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9095multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9096                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9097                         Predicate prd = HasAVX512> {
9098  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9099  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9100                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9101                           "$src2, $src1", "$src1, $src2",
9102                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9103                           EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9104  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9105                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9106                         "$src2, $src1", "$src1, $src2",
9107                         (OpNode (_.VT _.RC:$src1),
9108                          (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9109                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9110}
9111}
9112
9113defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9114                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9115                               T_MAP6, PD;
9116defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9117                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9118                                 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9119let Uses = [MXCSR] in {
9120defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9121                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9122                               T8, PD;
9123defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9124                               f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9125                               T8, PD;
9126defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9127                                 SchedWriteFRsqrt.Scl, f32x_info>,
9128                                 EVEX_CD8<32, CD8VT1>, T8, PD;
9129defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9130                                 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9131                                 EVEX_CD8<64, CD8VT1>, T8, PD;
9132}
9133
9134/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9135multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9136                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9137  let ExeDomain = _.ExeDomain in {
9138  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9139                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9140                         (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9141                         Sched<[sched]>;
9142  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9143                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9144                         (OpNode (_.VT
9145                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9146                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9147  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9148                          (ins _.ScalarMemOp:$src), OpcodeStr,
9149                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9150                          (OpNode (_.VT
9151                            (_.BroadcastLdFrag addr:$src)))>,
9152                          EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9153  }
9154}
9155
9156multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9157                                X86SchedWriteWidths sched> {
9158  let Uses = [MXCSR] in {
9159  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9160                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9161  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9162                             v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9163  }
9164  let Predicates = [HasFP16] in
9165  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9166                           v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9167
9168  // Define only if AVX512VL feature is present.
9169  let Predicates = [HasVLX], Uses = [MXCSR] in {
9170    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9171                                  OpNode, sched.XMM, v4f32x_info>,
9172                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9173    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9174                                  OpNode, sched.YMM, v8f32x_info>,
9175                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9176    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9177                                  OpNode, sched.XMM, v2f64x_info>,
9178                                  EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9179    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9180                                  OpNode, sched.YMM, v4f64x_info>,
9181                                  EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9182  }
9183  let Predicates = [HasFP16, HasVLX] in {
9184    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9185                                OpNode, sched.XMM, v8f16x_info>,
9186                                EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9187    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9188                                OpNode, sched.YMM, v16f16x_info>,
9189                                EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9190  }
9191}
9192
9193defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9194defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9195
9196/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9197multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9198                         SDNode OpNode, SDNode OpNodeSAE,
9199                         X86FoldableSchedWrite sched> {
9200  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9201  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9202                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9203                           "$src2, $src1", "$src1, $src2",
9204                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9205                           Sched<[sched]>, SIMD_EXC;
9206
9207  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9208                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9209                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9210                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9211                            EVEX_B, Sched<[sched]>;
9212
9213  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9214                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9215                         "$src2, $src1", "$src1, $src2",
9216                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9217                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9218  }
9219}
9220
9221multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9222                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9223  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9224                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9225  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9226                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9227}
9228
9229multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9230                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9231  let Predicates = [HasFP16] in
9232  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9233               EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9234}
9235
9236let Predicates = [HasERI] in {
9237  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9238                               SchedWriteFRcp.Scl>;
9239  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9240                               SchedWriteFRsqrt.Scl>;
9241}
9242
9243defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9244                              SchedWriteFRnd.Scl>,
9245                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9246                                  SchedWriteFRnd.Scl>;
9247/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9248
9249multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9250                         SDNode OpNode, X86FoldableSchedWrite sched> {
9251  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9252  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9253                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9254                         (OpNode (_.VT _.RC:$src))>,
9255                         Sched<[sched]>;
9256
9257  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9258                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9259                         (OpNode (_.VT
9260                             (bitconvert (_.LdFrag addr:$src))))>,
9261                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9262
9263  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9264                         (ins _.ScalarMemOp:$src), OpcodeStr,
9265                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9266                         (OpNode (_.VT
9267                                  (_.BroadcastLdFrag addr:$src)))>,
9268                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9269  }
9270}
9271multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9272                         SDNode OpNode, X86FoldableSchedWrite sched> {
9273  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9274  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9275                        (ins _.RC:$src), OpcodeStr,
9276                        "{sae}, $src", "$src, {sae}",
9277                        (OpNode (_.VT _.RC:$src))>,
9278                        EVEX_B, Sched<[sched]>;
9279}
9280
9281multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9282                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9283   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9284              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9285              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9286   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9287              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9288              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9289}
9290
9291multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9292                                  SDNode OpNode, X86SchedWriteWidths sched> {
9293  // Define only if AVX512VL feature is present.
9294  let Predicates = [HasVLX] in {
9295    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9296                                sched.XMM>,
9297                                EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9298    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9299                                sched.YMM>,
9300                                EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9301    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9302                                sched.XMM>,
9303                                EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9304    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9305                                sched.YMM>,
9306                                EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9307  }
9308}
9309
9310multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9311                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9312  let Predicates = [HasFP16] in
9313  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9314              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9315              T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9316  let Predicates = [HasFP16, HasVLX] in {
9317    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9318                                     EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9319    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9320                                     EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9321  }
9322}
9323let Predicates = [HasERI] in {
9324 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9325                            SchedWriteFRsqrt>, EVEX;
9326 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9327                            SchedWriteFRcp>, EVEX;
9328 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9329                            SchedWriteFAdd>, EVEX;
9330}
9331defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9332                            SchedWriteFRnd>,
9333                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9334                                     SchedWriteFRnd>,
9335                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9336                                          SchedWriteFRnd>, EVEX;
9337
9338multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9339                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9340  let ExeDomain = _.ExeDomain in
9341  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9342                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9343                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9344                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9345}
9346
9347multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9348                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9349  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9350  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9351                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9352                         (_.VT (any_fsqrt _.RC:$src)),
9353                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9354                         Sched<[sched]>;
9355  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9356                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9357                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9358                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9359                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9360  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361                          (ins _.ScalarMemOp:$src), OpcodeStr,
9362                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9363                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9364                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9365                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9366  }
9367}
9368
9369let Uses = [MXCSR], mayRaiseFPException = 1 in
9370multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9371                                  X86SchedWriteSizes sched> {
9372  let Predicates = [HasFP16] in
9373  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9374                                sched.PH.ZMM, v32f16_info>,
9375                                EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9376  let Predicates = [HasFP16, HasVLX] in {
9377    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9378                                     sched.PH.XMM, v8f16x_info>,
9379                                     EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9380    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9381                                     sched.PH.YMM, v16f16x_info>,
9382                                     EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9383  }
9384  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9385                                sched.PS.ZMM, v16f32_info>,
9386                                EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9387  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9388                                sched.PD.ZMM, v8f64_info>,
9389                                EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9390  // Define only if AVX512VL feature is present.
9391  let Predicates = [HasVLX] in {
9392    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9393                                     sched.PS.XMM, v4f32x_info>,
9394                                     EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9395    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9396                                     sched.PS.YMM, v8f32x_info>,
9397                                     EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9398    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9399                                     sched.PD.XMM, v2f64x_info>,
9400                                     EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9401    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9402                                     sched.PD.YMM, v4f64x_info>,
9403                                     EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9404  }
9405}
9406
9407let Uses = [MXCSR] in
9408multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9409                                        X86SchedWriteSizes sched> {
9410  let Predicates = [HasFP16] in
9411  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9412                                      sched.PH.ZMM, v32f16_info>,
9413                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9414  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9415                                      sched.PS.ZMM, v16f32_info>,
9416                                      EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9417  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9418                                      sched.PD.ZMM, v8f64_info>,
9419                                      EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9420}
9421
9422multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9423                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9424  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9425    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9426                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9427                         "$src2, $src1", "$src1, $src2",
9428                         (X86fsqrts (_.VT _.RC:$src1),
9429                                    (_.VT _.RC:$src2))>,
9430                         Sched<[sched]>, SIMD_EXC;
9431    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9432                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9433                         "$src2, $src1", "$src1, $src2",
9434                         (X86fsqrts (_.VT _.RC:$src1),
9435                                    (_.ScalarIntMemFrags addr:$src2))>,
9436                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9437    let Uses = [MXCSR] in
9438    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9439                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9440                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9441                         (X86fsqrtRnds (_.VT _.RC:$src1),
9442                                     (_.VT _.RC:$src2),
9443                                     (i32 timm:$rc))>,
9444                         EVEX_B, EVEX_RC, Sched<[sched]>;
9445
9446    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9447      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9448                (ins _.FRC:$src1, _.FRC:$src2),
9449                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9450                Sched<[sched]>, SIMD_EXC;
9451      let mayLoad = 1 in
9452        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9453                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9454                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9455                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9456    }
9457  }
9458
9459  let Predicates = [prd] in {
9460    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9461              (!cast<Instruction>(Name#Zr)
9462                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9463  }
9464
9465  let Predicates = [prd, OptForSize] in {
9466    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9467              (!cast<Instruction>(Name#Zm)
9468                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9469  }
9470}
9471
9472multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9473                                  X86SchedWriteSizes sched> {
9474  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9475                        EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9476  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9477                        EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9478  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9479                        EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9480}
9481
9482defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9483             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9484
9485defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9486
9487multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9488                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9489  let ExeDomain = _.ExeDomain in {
9490  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9491                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9492                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9493                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9494                           (i32 timm:$src3)))>,
9495                           Sched<[sched]>, SIMD_EXC;
9496
9497  let Uses = [MXCSR] in
9498  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9499                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9500                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9501                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9502                         (i32 timm:$src3)))>, EVEX_B,
9503                         Sched<[sched]>;
9504
9505  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9506                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9507                         OpcodeStr,
9508                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9509                         (_.VT (X86RndScales _.RC:$src1,
9510                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9511                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9512
9513  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9514    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9515               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9516               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9517               []>, Sched<[sched]>, SIMD_EXC;
9518
9519    let mayLoad = 1 in
9520      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9521                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9522                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9523                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9524  }
9525  }
9526
9527  let Predicates = [HasAVX512] in {
9528    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9529              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9530               _.FRC:$src1, timm:$src2))>;
9531  }
9532
9533  let Predicates = [HasAVX512, OptForSize] in {
9534    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9535              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9536               addr:$src1, timm:$src2))>;
9537  }
9538}
9539
9540let Predicates = [HasFP16] in
9541defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9542                                           SchedWriteFRnd.Scl, f16x_info>,
9543                                           AVX512PSIi8Base, TA, EVEX, VVVV,
9544                                           EVEX_CD8<16, CD8VT1>;
9545
9546defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9547                                           SchedWriteFRnd.Scl, f32x_info>,
9548                                           AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9549                                           EVEX_CD8<32, CD8VT1>;
9550
9551defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9552                                           SchedWriteFRnd.Scl, f64x_info>,
9553                                           REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9554                                           EVEX_CD8<64, CD8VT1>;
9555
9556multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9557                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9558                                dag OutMask, Predicate BasePredicate> {
9559  let Predicates = [BasePredicate] in {
9560    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9561               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9562               (extractelt _.VT:$dst, (iPTR 0))))),
9563              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9564               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9565
9566    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9567               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9568               ZeroFP))),
9569              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9570               OutMask, _.VT:$src2, _.VT:$src1)>;
9571  }
9572}
9573
9574defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9575                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9576                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9577defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9578                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9579                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9580defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9581                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9582                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9583
9584
9585//-------------------------------------------------
9586// Integer truncate and extend operations
9587//-------------------------------------------------
9588
9589multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9590                              SDPatternOperator MaskNode,
9591                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9592                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9593  let ExeDomain = DestInfo.ExeDomain in {
9594  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9595             (ins SrcInfo.RC:$src),
9596             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9597             [(set DestInfo.RC:$dst,
9598                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9599             EVEX, Sched<[sched]>;
9600  let Constraints = "$src0 = $dst" in
9601  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9602             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9603             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9604             [(set DestInfo.RC:$dst,
9605                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9606                             (DestInfo.VT DestInfo.RC:$src0),
9607                             SrcInfo.KRCWM:$mask))]>,
9608             EVEX, EVEX_K, Sched<[sched]>;
9609  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9610             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9611             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9612             [(set DestInfo.RC:$dst,
9613                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9614                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9615             EVEX, EVEX_KZ, Sched<[sched]>;
9616  }
9617
9618  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9619    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9620               (ins x86memop:$dst, SrcInfo.RC:$src),
9621               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9622               EVEX, Sched<[sched.Folded]>;
9623
9624    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9625               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9626               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9627               EVEX, EVEX_K, Sched<[sched.Folded]>;
9628  }//mayStore = 1, hasSideEffects = 0
9629}
9630
9631multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9632                                    PatFrag truncFrag, PatFrag mtruncFrag,
9633                                    string Name> {
9634
9635  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9636            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9637                                    addr:$dst, SrcInfo.RC:$src)>;
9638
9639  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9640                        SrcInfo.KRCWM:$mask),
9641            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9642                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9643}
9644
9645multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9646                        SDNode OpNode256, SDNode OpNode512,
9647                        SDPatternOperator MaskNode128,
9648                        SDPatternOperator MaskNode256,
9649                        SDPatternOperator MaskNode512,
9650                        X86SchedWriteWidths sched,
9651                        AVX512VLVectorVTInfo VTSrcInfo,
9652                        X86VectorVTInfo DestInfoZ128,
9653                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9654                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9655                        X86MemOperand x86memopZ, PatFrag truncFrag,
9656                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9657
9658  let Predicates = [HasVLX, prd] in {
9659    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9660                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9661                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9662                                         mtruncFrag, NAME>, EVEX_V128;
9663
9664    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9665                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9666                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9667                                         mtruncFrag, NAME>, EVEX_V256;
9668  }
9669  let Predicates = [prd] in
9670    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9671                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9672                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9673                                         mtruncFrag, NAME>, EVEX_V512;
9674}
9675
9676multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9677                           X86SchedWriteWidths sched, PatFrag StoreNode,
9678                           PatFrag MaskedStoreNode, SDNode InVecNode,
9679                           SDPatternOperator InVecMaskNode> {
9680  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9681                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9682                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9683                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9684                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9685}
9686
9687multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9688                           SDPatternOperator MaskNode,
9689                           X86SchedWriteWidths sched, PatFrag StoreNode,
9690                           PatFrag MaskedStoreNode, SDNode InVecNode,
9691                           SDPatternOperator InVecMaskNode> {
9692  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9693                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9694                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9695                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9696                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9697}
9698
9699multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700                           SDPatternOperator MaskNode,
9701                           X86SchedWriteWidths sched, PatFrag StoreNode,
9702                           PatFrag MaskedStoreNode, SDNode InVecNode,
9703                           SDPatternOperator InVecMaskNode> {
9704  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9705                          InVecMaskNode, MaskNode, MaskNode, sched,
9706                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9707                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9708                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9709}
9710
9711multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9712                           SDPatternOperator MaskNode,
9713                           X86SchedWriteWidths sched, PatFrag StoreNode,
9714                           PatFrag MaskedStoreNode, SDNode InVecNode,
9715                           SDPatternOperator InVecMaskNode> {
9716  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9717                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9718                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9719                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9720                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9721}
9722
9723multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9724                           SDPatternOperator MaskNode,
9725                           X86SchedWriteWidths sched, PatFrag StoreNode,
9726                           PatFrag MaskedStoreNode, SDNode InVecNode,
9727                           SDPatternOperator InVecMaskNode> {
9728  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9729                          InVecMaskNode, MaskNode, MaskNode, sched,
9730                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9731                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9732                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9733}
9734
9735multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9736                           SDPatternOperator MaskNode,
9737                           X86SchedWriteWidths sched, PatFrag StoreNode,
9738                           PatFrag MaskedStoreNode, SDNode InVecNode,
9739                           SDPatternOperator InVecMaskNode> {
9740  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9741                          InVecMaskNode, MaskNode, MaskNode, sched,
9742                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9743                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9744                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9745}
9746
9747defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9748                                  SchedWriteVecTruncate, truncstorevi8,
9749                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9750defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9751                                  SchedWriteVecTruncate, truncstore_s_vi8,
9752                                  masked_truncstore_s_vi8, X86vtruncs,
9753                                  X86vmtruncs>;
9754defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9755                                  SchedWriteVecTruncate, truncstore_us_vi8,
9756                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9757
9758defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9759                                  SchedWriteVecTruncate, truncstorevi16,
9760                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9761defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9762                                  SchedWriteVecTruncate, truncstore_s_vi16,
9763                                  masked_truncstore_s_vi16, X86vtruncs,
9764                                  X86vmtruncs>;
9765defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9766                                  select_truncus, SchedWriteVecTruncate,
9767                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9768                                  X86vtruncus, X86vmtruncus>;
9769
9770defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9771                                  SchedWriteVecTruncate, truncstorevi32,
9772                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9773defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9774                                  SchedWriteVecTruncate, truncstore_s_vi32,
9775                                  masked_truncstore_s_vi32, X86vtruncs,
9776                                  X86vmtruncs>;
9777defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9778                                  select_truncus, SchedWriteVecTruncate,
9779                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9780                                  X86vtruncus, X86vmtruncus>;
9781
9782defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9783                                  SchedWriteVecTruncate, truncstorevi8,
9784                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9785defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9786                                  SchedWriteVecTruncate, truncstore_s_vi8,
9787                                  masked_truncstore_s_vi8, X86vtruncs,
9788                                  X86vmtruncs>;
9789defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9790                                  select_truncus, SchedWriteVecTruncate,
9791                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9792                                  X86vtruncus, X86vmtruncus>;
9793
9794defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9795                                  SchedWriteVecTruncate, truncstorevi16,
9796                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9797defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9798                                  SchedWriteVecTruncate, truncstore_s_vi16,
9799                                  masked_truncstore_s_vi16, X86vtruncs,
9800                                  X86vmtruncs>;
9801defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9802                                  select_truncus, SchedWriteVecTruncate,
9803                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9804                                  X86vtruncus, X86vmtruncus>;
9805
9806defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9807                                  SchedWriteVecTruncate, truncstorevi8,
9808                                  masked_truncstorevi8, X86vtrunc,
9809                                  X86vmtrunc>;
9810defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9811                                  SchedWriteVecTruncate, truncstore_s_vi8,
9812                                  masked_truncstore_s_vi8, X86vtruncs,
9813                                  X86vmtruncs>;
9814defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9815                                  select_truncus, SchedWriteVecTruncate,
9816                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9817                                  X86vtruncus, X86vmtruncus>;
9818
9819let Predicates = [HasAVX512, NoVLX] in {
9820def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9821         (v8i16 (EXTRACT_SUBREG
9822                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9823                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9824def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9825         (v4i32 (EXTRACT_SUBREG
9826                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9827                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9828}
9829
9830let Predicates = [HasBWI, NoVLX] in {
9831def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9832         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9833                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9834}
9835
9836// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9837multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9838                           X86VectorVTInfo DestInfo,
9839                           X86VectorVTInfo SrcInfo> {
9840  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9841                                 DestInfo.RC:$src0,
9842                                 SrcInfo.KRCWM:$mask)),
9843            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9844                                                 SrcInfo.KRCWM:$mask,
9845                                                 SrcInfo.RC:$src)>;
9846
9847  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9848                                 DestInfo.ImmAllZerosV,
9849                                 SrcInfo.KRCWM:$mask)),
9850            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9851                                                  SrcInfo.RC:$src)>;
9852}
9853
9854let Predicates = [HasVLX] in {
9855defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9856defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9857defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9858}
9859
9860let Predicates = [HasAVX512] in {
9861defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9862defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9863defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9864
9865defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9866defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9867defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9868
9869defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9870defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9871defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9872}
9873
9874multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9875              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9876              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9877  let ExeDomain = DestInfo.ExeDomain in {
9878  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9879                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9880                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9881                  EVEX, Sched<[sched]>;
9882
9883  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9884                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9885                  (DestInfo.VT (LdFrag addr:$src))>,
9886                EVEX, Sched<[sched.Folded]>;
9887  }
9888}
9889
9890multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9891          SDNode OpNode, SDNode InVecNode, string ExtTy,
9892          X86SchedWriteWidths sched,
9893          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9894  let Predicates = [HasVLX, HasBWI] in {
9895    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
9896                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9897                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
9898
9899    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
9900                    v16i8x_info, i128mem, LdFrag, OpNode>,
9901                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
9902  }
9903  let Predicates = [HasBWI] in {
9904    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
9905                    v32i8x_info, i256mem, LdFrag, OpNode>,
9906                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
9907  }
9908}
9909
9910multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
9911          SDNode OpNode, SDNode InVecNode, string ExtTy,
9912          X86SchedWriteWidths sched,
9913          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9914  let Predicates = [HasVLX, HasAVX512] in {
9915    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9916                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9917                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
9918
9919    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9920                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9921                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
9922  }
9923  let Predicates = [HasAVX512] in {
9924    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9925                   v16i8x_info, i128mem, LdFrag, OpNode>,
9926                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
9927  }
9928}
9929
9930multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
9931                              SDNode InVecNode, string ExtTy,
9932                              X86SchedWriteWidths sched,
9933                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9934  let Predicates = [HasVLX, HasAVX512] in {
9935    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9936                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9937                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
9938
9939    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9940                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9941                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
9942  }
9943  let Predicates = [HasAVX512] in {
9944    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9945                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9946                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
9947  }
9948}
9949
9950multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
9951         SDNode OpNode, SDNode InVecNode, string ExtTy,
9952         X86SchedWriteWidths sched,
9953         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9954  let Predicates = [HasVLX, HasAVX512] in {
9955    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9956                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9957                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
9958
9959    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9960                   v8i16x_info, i128mem, LdFrag, OpNode>,
9961                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
9962  }
9963  let Predicates = [HasAVX512] in {
9964    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9965                   v16i16x_info, i256mem, LdFrag, OpNode>,
9966                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
9967  }
9968}
9969
9970multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
9971         SDNode OpNode, SDNode InVecNode, string ExtTy,
9972         X86SchedWriteWidths sched,
9973         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9974  let Predicates = [HasVLX, HasAVX512] in {
9975    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9976                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9977                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
9978
9979    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9980                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9981                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
9982  }
9983  let Predicates = [HasAVX512] in {
9984    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9985                   v8i16x_info, i128mem, LdFrag, OpNode>,
9986                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
9987  }
9988}
9989
9990multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
9991         SDNode OpNode, SDNode InVecNode, string ExtTy,
9992         X86SchedWriteWidths sched,
9993         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9994
9995  let Predicates = [HasVLX, HasAVX512] in {
9996    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9997                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9998                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
9999
10000    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10001                   v4i32x_info, i128mem, LdFrag, OpNode>,
10002                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10003  }
10004  let Predicates = [HasAVX512] in {
10005    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10006                   v8i32x_info, i256mem, LdFrag, OpNode>,
10007                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10008  }
10009}
10010
10011defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10012defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10013defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10014defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10015defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10016defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10017
10018defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10019defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10020defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10021defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10022defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10023defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10024
10025
10026// Patterns that we also need any extend versions of. aext_vector_inreg
10027// is currently legalized to zext_vector_inreg.
10028multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10029  // 256-bit patterns
10030  let Predicates = [HasVLX, HasBWI] in {
10031    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10032              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10033  }
10034
10035  let Predicates = [HasVLX] in {
10036    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10037              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10038
10039    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10040              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10041  }
10042
10043  // 512-bit patterns
10044  let Predicates = [HasBWI] in {
10045    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10046              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10047  }
10048  let Predicates = [HasAVX512] in {
10049    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10050              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10051    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10052              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10053
10054    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10055              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10056
10057    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10058              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10059  }
10060}
10061
10062multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10063                                 SDNode InVecOp> :
10064    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10065  // 128-bit patterns
10066  let Predicates = [HasVLX, HasBWI] in {
10067  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10068            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10069  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10070            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10071  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10072            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10073  }
10074  let Predicates = [HasVLX] in {
10075  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10076            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10077  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10078            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10079
10080  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10081            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10082
10083  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10084            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10085  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10086            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10087  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10088            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10089
10090  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10091            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10092  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10093            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10094
10095  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10096            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10097  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10098            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10099  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10100            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10101  }
10102  let Predicates = [HasVLX] in {
10103  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10104            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10105  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10106            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10107  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10108            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10109
10110  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10111            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10112  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10113            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10114
10115  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10116            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10117  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10118            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10119  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10120            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10121  }
10122  // 512-bit patterns
10123  let Predicates = [HasAVX512] in {
10124  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10125            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10126  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10127            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10128  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10129            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10130  }
10131}
10132
10133defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10134defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10135
10136// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10137// ext+trunc aggressively making it impossible to legalize the DAG to this
10138// pattern directly.
10139let Predicates = [HasAVX512, NoBWI] in {
10140def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10141         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10142def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10143         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10144}
10145
10146//===----------------------------------------------------------------------===//
10147// GATHER - SCATTER Operations
10148
10149// FIXME: Improve scheduling of gather/scatter instructions.
10150multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10151                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10152  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10153      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10154  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10155            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10156            !strconcat(OpcodeStr#_.Suffix,
10157            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10158            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10159            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10160}
10161
10162multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10163                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10164  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10165                                      vy512xmem>, EVEX_V512, REX_W;
10166  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10167                                      vz512mem>, EVEX_V512, REX_W;
10168let Predicates = [HasVLX] in {
10169  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10170                              vx256xmem>, EVEX_V256, REX_W;
10171  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10172                              vy256xmem>, EVEX_V256, REX_W;
10173  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10174                              vx128xmem>, EVEX_V128, REX_W;
10175  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10176                              vx128xmem>, EVEX_V128, REX_W;
10177}
10178}
10179
10180multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10181                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10182  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10183                                       EVEX_V512;
10184  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10185                                       EVEX_V512;
10186let Predicates = [HasVLX] in {
10187  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10188                                          vy256xmem>, EVEX_V256;
10189  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10190                                          vy128xmem>, EVEX_V256;
10191  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10192                                          vx128xmem>, EVEX_V128;
10193  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10194                                          vx64xmem, VK2WM>, EVEX_V128;
10195}
10196}
10197
10198
10199defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10200               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10201
10202defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10203                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10204
10205multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10206                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10207
10208let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10209    hasSideEffects = 0 in
10210
10211  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10212            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10213            !strconcat(OpcodeStr#_.Suffix,
10214            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10215            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10216            Sched<[WriteStore]>;
10217}
10218
10219multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10220                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10221  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10222                                      vy512xmem>, EVEX_V512, REX_W;
10223  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10224                                      vz512mem>, EVEX_V512, REX_W;
10225let Predicates = [HasVLX] in {
10226  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10227                              vx256xmem>, EVEX_V256, REX_W;
10228  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10229                              vy256xmem>, EVEX_V256, REX_W;
10230  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10231                              vx128xmem>, EVEX_V128, REX_W;
10232  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10233                              vx128xmem>, EVEX_V128, REX_W;
10234}
10235}
10236
10237multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10238                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10239  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10240                                       EVEX_V512;
10241  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10242                                       EVEX_V512;
10243let Predicates = [HasVLX] in {
10244  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10245                                          vy256xmem>, EVEX_V256;
10246  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10247                                          vy128xmem>, EVEX_V256;
10248  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10249                                          vx128xmem>, EVEX_V128;
10250  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10251                                          vx64xmem, VK2WM>, EVEX_V128;
10252}
10253}
10254
10255defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10256               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10257
10258defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10259                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10260
10261// prefetch
10262multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10263                       RegisterClass KRC, X86MemOperand memop> {
10264  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10265  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10266            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10267            EVEX, EVEX_K, Sched<[WriteLoad]>;
10268}
10269
10270defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10271                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10272
10273defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10274                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10275
10276defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10277                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10278
10279defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10280                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10281
10282defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10283                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10284
10285defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10286                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10287
10288defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10289                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10290
10291defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10292                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10293
10294defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10295                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10296
10297defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10298                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10299
10300defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10301                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10302
10303defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10304                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10305
10306defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10307                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10308
10309defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10310                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10311
10312defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10313                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10314
10315defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10316                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10317
10318multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10319def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10320                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10321                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10322                  EVEX, Sched<[Sched]>;
10323}
10324
10325multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10326                                 string OpcodeStr, Predicate prd> {
10327let Predicates = [prd] in
10328  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10329
10330  let Predicates = [prd, HasVLX] in {
10331    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10332    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10333  }
10334}
10335
10336defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10337defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10338defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10339defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10340
10341multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10342    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10343                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10344                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10345                        EVEX, Sched<[WriteMove]>;
10346}
10347
10348// Use 512bit version to implement 128/256 bit in case NoVLX.
10349multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10350                                           X86VectorVTInfo _,
10351                                           string Name> {
10352
10353  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10354            (_.KVT (COPY_TO_REGCLASS
10355                     (!cast<Instruction>(Name#"Zrr")
10356                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10357                                      _.RC:$src, _.SubRegIdx)),
10358                   _.KRC))>;
10359}
10360
10361multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10362                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10363  let Predicates = [prd] in
10364    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10365                                            EVEX_V512;
10366
10367  let Predicates = [prd, HasVLX] in {
10368    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10369                                              EVEX_V256;
10370    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10371                                               EVEX_V128;
10372  }
10373  let Predicates = [prd, NoVLX] in {
10374    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10375    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10376  }
10377}
10378
10379defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10380                                              avx512vl_i8_info, HasBWI>;
10381defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10382                                              avx512vl_i16_info, HasBWI>, REX_W;
10383defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10384                                              avx512vl_i32_info, HasDQI>;
10385defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10386                                              avx512vl_i64_info, HasDQI>, REX_W;
10387
10388// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10389// is available, but BWI is not. We can't handle this in lowering because
10390// a target independent DAG combine likes to combine sext and trunc.
10391let Predicates = [HasDQI, NoBWI] in {
10392  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10393            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10394  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10395            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10396}
10397
10398let Predicates = [HasDQI, NoBWI, HasVLX] in {
10399  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10400            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10401}
10402
10403//===----------------------------------------------------------------------===//
10404// AVX-512 - COMPRESS and EXPAND
10405//
10406
10407multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10408                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10409  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10410              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10411              (null_frag)>, AVX5128IBase,
10412              Sched<[sched]>;
10413
10414  let mayStore = 1, hasSideEffects = 0 in
10415  def mr : AVX5128I<opc, MRMDestMem, (outs),
10416              (ins _.MemOp:$dst, _.RC:$src),
10417              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10418              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10419              Sched<[sched.Folded]>;
10420
10421  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10422              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10423              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10424              []>,
10425              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10426              Sched<[sched.Folded]>;
10427}
10428
10429multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10430  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10431            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10432                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10433
10434  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10435            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10436                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10437  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10438            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10439                            _.KRCWM:$mask, _.RC:$src)>;
10440}
10441
10442multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10443                                 X86FoldableSchedWrite sched,
10444                                 AVX512VLVectorVTInfo VTInfo,
10445                                 Predicate Pred = HasAVX512> {
10446  let Predicates = [Pred] in
10447  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10448           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10449
10450  let Predicates = [Pred, HasVLX] in {
10451    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10452                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10453    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10454                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10455  }
10456}
10457
10458// FIXME: Is there a better scheduler class for VPCOMPRESS?
10459defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10460                                          avx512vl_i32_info>, EVEX;
10461defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10462                                          avx512vl_i64_info>, EVEX, REX_W;
10463defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10464                                          avx512vl_f32_info>, EVEX;
10465defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10466                                          avx512vl_f64_info>, EVEX, REX_W;
10467
10468// expand
10469multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10470                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10471  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10472              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10473              (null_frag)>, AVX5128IBase,
10474              Sched<[sched]>;
10475
10476  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10477              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10478              (null_frag)>,
10479            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10480            Sched<[sched.Folded, sched.ReadAfterFold]>;
10481}
10482
10483multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10484
10485  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10486            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10487                                        _.KRCWM:$mask, addr:$src)>;
10488
10489  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10490            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10491                                        _.KRCWM:$mask, addr:$src)>;
10492
10493  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10494                                               (_.VT _.RC:$src0))),
10495            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10496                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10497
10498  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10499            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10500                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10501  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10502            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10503                            _.KRCWM:$mask, _.RC:$src)>;
10504}
10505
10506multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10507                               X86FoldableSchedWrite sched,
10508                               AVX512VLVectorVTInfo VTInfo,
10509                               Predicate Pred = HasAVX512> {
10510  let Predicates = [Pred] in
10511  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10512           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10513
10514  let Predicates = [Pred, HasVLX] in {
10515    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10516                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10517    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10518                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10519  }
10520}
10521
10522// FIXME: Is there a better scheduler class for VPEXPAND?
10523defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10524                                      avx512vl_i32_info>, EVEX;
10525defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10526                                      avx512vl_i64_info>, EVEX, REX_W;
10527defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10528                                      avx512vl_f32_info>, EVEX;
10529defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10530                                      avx512vl_f64_info>, EVEX, REX_W;
10531
10532//handle instruction  reg_vec1 = op(reg_vec,imm)
10533//                               op(mem_vec,imm)
10534//                               op(broadcast(eltVt),imm)
10535//all instruction created with FROUND_CURRENT
10536multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10537                                      SDPatternOperator OpNode,
10538                                      SDPatternOperator MaskOpNode,
10539                                      X86FoldableSchedWrite sched,
10540                                      X86VectorVTInfo _> {
10541  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10542  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10543                      (ins _.RC:$src1, i32u8imm:$src2),
10544                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10545                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10546                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10547                      Sched<[sched]>;
10548  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10549                    (ins _.MemOp:$src1, i32u8imm:$src2),
10550                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10551                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10552                            (i32 timm:$src2)),
10553                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10554                                (i32 timm:$src2))>,
10555                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10556  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10558                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10559                    "${src1}"#_.BroadcastStr#", $src2",
10560                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10561                            (i32 timm:$src2)),
10562                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10563                                (i32 timm:$src2))>, EVEX_B,
10564                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10565  }
10566}
10567
10568//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10569multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10570                                          SDNode OpNode, X86FoldableSchedWrite sched,
10571                                          X86VectorVTInfo _> {
10572  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10573  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10574                      (ins _.RC:$src1, i32u8imm:$src2),
10575                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10576                      "$src1, {sae}, $src2",
10577                      (OpNode (_.VT _.RC:$src1),
10578                              (i32 timm:$src2))>,
10579                      EVEX_B, Sched<[sched]>;
10580}
10581
10582multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10583            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10584            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10585            Predicate prd>{
10586  let Predicates = [prd] in {
10587    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10588                                           sched.ZMM, _.info512>,
10589                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10590                                               sched.ZMM, _.info512>, EVEX_V512;
10591  }
10592  let Predicates = [prd, HasVLX] in {
10593    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10594                                           sched.XMM, _.info128>, EVEX_V128;
10595    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10596                                           sched.YMM, _.info256>, EVEX_V256;
10597  }
10598}
10599
10600//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10601//                               op(reg_vec2,mem_vec,imm)
10602//                               op(reg_vec2,broadcast(eltVt),imm)
10603//all instruction created with FROUND_CURRENT
10604multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10605                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10606  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10607  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10608                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10609                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10610                      (OpNode (_.VT _.RC:$src1),
10611                              (_.VT _.RC:$src2),
10612                              (i32 timm:$src3))>,
10613                      Sched<[sched]>;
10614  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10615                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10616                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10617                    (OpNode (_.VT _.RC:$src1),
10618                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10619                            (i32 timm:$src3))>,
10620                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10621  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10622                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10623                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10624                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10625                    (OpNode (_.VT _.RC:$src1),
10626                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10627                            (i32 timm:$src3))>, EVEX_B,
10628                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10629  }
10630}
10631
10632//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10633//                               op(reg_vec2,mem_vec,imm)
10634multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10635                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10636                              X86VectorVTInfo SrcInfo>{
10637  let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10638  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10639                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10640                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10641                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10642                               (SrcInfo.VT SrcInfo.RC:$src2),
10643                               (i8 timm:$src3)))>,
10644                  Sched<[sched]>;
10645  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10646                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10647                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10648                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10649                             (SrcInfo.VT (bitconvert
10650                                                (SrcInfo.LdFrag addr:$src2))),
10651                             (i8 timm:$src3)))>,
10652                Sched<[sched.Folded, sched.ReadAfterFold]>;
10653  }
10654}
10655
10656//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10657//                               op(reg_vec2,mem_vec,imm)
10658//                               op(reg_vec2,broadcast(eltVt),imm)
10659multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10660                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10661  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10662
10663  let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10664  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10666                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10667                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10668                    (OpNode (_.VT _.RC:$src1),
10669                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10670                            (i8 timm:$src3))>, EVEX_B,
10671                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10672}
10673
10674//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10675//                                      op(reg_vec2,mem_scalar,imm)
10676multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10677                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10678  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10679  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10681                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10682                      (OpNode (_.VT _.RC:$src1),
10683                              (_.VT _.RC:$src2),
10684                              (i32 timm:$src3))>,
10685                      Sched<[sched]>;
10686  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10687                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10688                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10689                    (OpNode (_.VT _.RC:$src1),
10690                            (_.ScalarIntMemFrags addr:$src2),
10691                            (i32 timm:$src3))>,
10692                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10693  }
10694}
10695
10696//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10697multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10698                                    SDNode OpNode, X86FoldableSchedWrite sched,
10699                                    X86VectorVTInfo _> {
10700  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10701  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10702                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10703                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10704                      "$src1, $src2, {sae}, $src3",
10705                      (OpNode (_.VT _.RC:$src1),
10706                              (_.VT _.RC:$src2),
10707                              (i32 timm:$src3))>,
10708                      EVEX_B, Sched<[sched]>;
10709}
10710
10711//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10712multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10713                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10714  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10715  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10716                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10717                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10718                      "$src1, $src2, {sae}, $src3",
10719                      (OpNode (_.VT _.RC:$src1),
10720                              (_.VT _.RC:$src2),
10721                              (i32 timm:$src3))>,
10722                      EVEX_B, Sched<[sched]>;
10723}
10724
10725multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10726            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10727            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10728  let Predicates = [prd] in {
10729    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10730                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10731                                  EVEX_V512;
10732
10733  }
10734  let Predicates = [prd, HasVLX] in {
10735    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10736                                  EVEX_V128;
10737    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10738                                  EVEX_V256;
10739  }
10740}
10741
10742multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10743                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10744                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10745  let Predicates = [Pred] in {
10746    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10747                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10748  }
10749  let Predicates = [Pred, HasVLX] in {
10750    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10751                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10752    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10753                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10754  }
10755}
10756
10757multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10758                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10759                                  Predicate Pred = HasAVX512> {
10760  let Predicates = [Pred] in {
10761    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10762                                EVEX_V512;
10763  }
10764  let Predicates = [Pred, HasVLX] in {
10765    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10766                                EVEX_V128;
10767    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10768                                EVEX_V256;
10769  }
10770}
10771
10772multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10773                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10774                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10775  let Predicates = [prd] in {
10776     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10777              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10778  }
10779}
10780
10781multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10782                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10783                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10784                    X86SchedWriteWidths sched, Predicate prd>{
10785  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10786                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10787                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10788  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10789                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10790                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10791  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10792                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10793                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10794}
10795
10796defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10797                              X86VReduce, X86VReduce, X86VReduceSAE,
10798                              SchedWriteFRnd, HasDQI>;
10799defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10800                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10801                              SchedWriteFRnd, HasAVX512>;
10802defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10803                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10804                              SchedWriteFRnd, HasAVX512>;
10805
10806defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10807                                                0x50, X86VRange, X86VRangeSAE,
10808                                                SchedWriteFAdd, HasDQI>,
10809      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10810defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10811                                                0x50, X86VRange, X86VRangeSAE,
10812                                                SchedWriteFAdd, HasDQI>,
10813      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10814
10815defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10816      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10817      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10818defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10819      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10820      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10821
10822defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10823      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10824      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10825defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10826      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10827      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10828defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10829      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10830      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10831
10832defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10833      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10834      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10835defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10836      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10837      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10838defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10839      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10840      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10841
10842multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10843                                          X86FoldableSchedWrite sched,
10844                                          X86VectorVTInfo _,
10845                                          X86VectorVTInfo CastInfo> {
10846  let ExeDomain = _.ExeDomain in {
10847  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10848                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10849                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10850                  (_.VT (bitconvert
10851                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10852                                                  (i8 timm:$src3)))))>,
10853                  Sched<[sched]>;
10854  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10855                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10856                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10857                (_.VT
10858                 (bitconvert
10859                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10860                                           (CastInfo.LdFrag addr:$src2),
10861                                           (i8 timm:$src3)))))>,
10862                Sched<[sched.Folded, sched.ReadAfterFold]>;
10863  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10864                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10865                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10866                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10867                    (_.VT
10868                     (bitconvert
10869                      (CastInfo.VT
10870                       (X86Shuf128 _.RC:$src1,
10871                                   (_.BroadcastLdFrag addr:$src2),
10872                                   (i8 timm:$src3)))))>, EVEX_B,
10873                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10874  }
10875}
10876
10877multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10878                                   AVX512VLVectorVTInfo _,
10879                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10880  let Predicates = [HasAVX512] in
10881  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10882                                          _.info512, CastInfo.info512>, EVEX_V512;
10883
10884  let Predicates = [HasAVX512, HasVLX] in
10885  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10886                                             _.info256, CastInfo.info256>, EVEX_V256;
10887}
10888
10889defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10890      avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10891defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10892      avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10893defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10894      avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10895defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10896      avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10897
10898multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10899                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10900  let ExeDomain = _.ExeDomain in {
10901  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10902                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10903                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10904                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10905                  Sched<[sched]>;
10906  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10907                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10908                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10909                (_.VT (X86VAlign _.RC:$src1,
10910                                 (bitconvert (_.LdFrag addr:$src2)),
10911                                 (i8 timm:$src3)))>,
10912                Sched<[sched.Folded, sched.ReadAfterFold]>;
10913
10914  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10915                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10916                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10917                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
10918                   (X86VAlign _.RC:$src1,
10919                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10920                              (i8 timm:$src3))>, EVEX_B,
10921                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10922  }
10923}
10924
10925multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10926                                AVX512VLVectorVTInfo _> {
10927  let Predicates = [HasAVX512] in {
10928    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10929                                AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
10930  }
10931  let Predicates = [HasAVX512, HasVLX] in {
10932    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10933                                AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
10934    // We can't really override the 256-bit version so change it back to unset.
10935    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10936                                AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
10937  }
10938}
10939
10940defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10941                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10942defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10943                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10944                                   REX_W;
10945
10946defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10947                                         SchedWriteShuffle, avx512vl_i8_info,
10948                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10949
10950// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10951// into vpalignr.
10952def ValignqImm32XForm : SDNodeXForm<timm, [{
10953  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10954}]>;
10955def ValignqImm8XForm : SDNodeXForm<timm, [{
10956  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10957}]>;
10958def ValigndImm8XForm : SDNodeXForm<timm, [{
10959  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10960}]>;
10961
10962multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10963                                        X86VectorVTInfo From, X86VectorVTInfo To,
10964                                        SDNodeXForm ImmXForm> {
10965  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10966                                 (bitconvert
10967                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10968                                                   timm:$src3))),
10969                                 To.RC:$src0)),
10970            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10971                                                  To.RC:$src1, To.RC:$src2,
10972                                                  (ImmXForm timm:$src3))>;
10973
10974  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10975                                 (bitconvert
10976                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10977                                                   timm:$src3))),
10978                                 To.ImmAllZerosV)),
10979            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10980                                                   To.RC:$src1, To.RC:$src2,
10981                                                   (ImmXForm timm:$src3))>;
10982
10983  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10984                                 (bitconvert
10985                                  (From.VT (OpNode From.RC:$src1,
10986                                                   (From.LdFrag addr:$src2),
10987                                           timm:$src3))),
10988                                 To.RC:$src0)),
10989            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10990                                                  To.RC:$src1, addr:$src2,
10991                                                  (ImmXForm timm:$src3))>;
10992
10993  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10994                                 (bitconvert
10995                                  (From.VT (OpNode From.RC:$src1,
10996                                                   (From.LdFrag addr:$src2),
10997                                           timm:$src3))),
10998                                 To.ImmAllZerosV)),
10999            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11000                                                   To.RC:$src1, addr:$src2,
11001                                                   (ImmXForm timm:$src3))>;
11002}
11003
11004multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11005                                           X86VectorVTInfo From,
11006                                           X86VectorVTInfo To,
11007                                           SDNodeXForm ImmXForm> :
11008      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11009  def : Pat<(From.VT (OpNode From.RC:$src1,
11010                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11011                             timm:$src3)),
11012            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11013                                                  (ImmXForm timm:$src3))>;
11014
11015  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11016                                 (bitconvert
11017                                  (From.VT (OpNode From.RC:$src1,
11018                                           (bitconvert
11019                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11020                                           timm:$src3))),
11021                                 To.RC:$src0)),
11022            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11023                                                   To.RC:$src1, addr:$src2,
11024                                                   (ImmXForm timm:$src3))>;
11025
11026  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11027                                 (bitconvert
11028                                  (From.VT (OpNode From.RC:$src1,
11029                                           (bitconvert
11030                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11031                                           timm:$src3))),
11032                                 To.ImmAllZerosV)),
11033            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11034                                                    To.RC:$src1, addr:$src2,
11035                                                    (ImmXForm timm:$src3))>;
11036}
11037
11038let Predicates = [HasAVX512] in {
11039  // For 512-bit we lower to the widest element type we can. So we only need
11040  // to handle converting valignq to valignd.
11041  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11042                                         v16i32_info, ValignqImm32XForm>;
11043}
11044
11045let Predicates = [HasVLX] in {
11046  // For 128-bit we lower to the widest element type we can. So we only need
11047  // to handle converting valignq to valignd.
11048  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11049                                         v4i32x_info, ValignqImm32XForm>;
11050  // For 256-bit we lower to the widest element type we can. So we only need
11051  // to handle converting valignq to valignd.
11052  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11053                                         v8i32x_info, ValignqImm32XForm>;
11054}
11055
11056let Predicates = [HasVLX, HasBWI] in {
11057  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11058  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11059                                      v16i8x_info, ValignqImm8XForm>;
11060  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11061                                      v16i8x_info, ValigndImm8XForm>;
11062}
11063
11064defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11065                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11066                EVEX_CD8<8, CD8VF>;
11067
11068multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11069                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11070  let ExeDomain = _.ExeDomain in {
11071  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11072                    (ins _.RC:$src1), OpcodeStr,
11073                    "$src1", "$src1",
11074                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11075                    Sched<[sched]>;
11076
11077  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11078                  (ins _.MemOp:$src1), OpcodeStr,
11079                  "$src1", "$src1",
11080                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11081            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11082            Sched<[sched.Folded]>;
11083  }
11084}
11085
11086multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11088           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11089  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11090                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11091                  "${src1}"#_.BroadcastStr,
11092                  "${src1}"#_.BroadcastStr,
11093                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11094             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11095             Sched<[sched.Folded]>;
11096}
11097
11098multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11099                              X86SchedWriteWidths sched,
11100                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11101  let Predicates = [prd] in
11102    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11103                             EVEX_V512;
11104
11105  let Predicates = [prd, HasVLX] in {
11106    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11107                              EVEX_V256;
11108    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11109                              EVEX_V128;
11110  }
11111}
11112
11113multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11114                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11115                               Predicate prd> {
11116  let Predicates = [prd] in
11117    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11118                              EVEX_V512;
11119
11120  let Predicates = [prd, HasVLX] in {
11121    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11122                                 EVEX_V256;
11123    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11124                                 EVEX_V128;
11125  }
11126}
11127
11128multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11129                                 SDNode OpNode, X86SchedWriteWidths sched,
11130                                 Predicate prd> {
11131  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11132                               avx512vl_i64_info, prd>, REX_W;
11133  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11134                               avx512vl_i32_info, prd>;
11135}
11136
11137multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11138                                 SDNode OpNode, X86SchedWriteWidths sched,
11139                                 Predicate prd> {
11140  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11141                              avx512vl_i16_info, prd>, WIG;
11142  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11143                              avx512vl_i8_info, prd>, WIG;
11144}
11145
11146multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11147                                  bits<8> opc_d, bits<8> opc_q,
11148                                  string OpcodeStr, SDNode OpNode,
11149                                  X86SchedWriteWidths sched> {
11150  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11151                                    HasAVX512>,
11152              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11153                                    HasBWI>;
11154}
11155
11156defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11157                                    SchedWriteVecALU>;
11158
11159// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11160let Predicates = [HasAVX512, NoVLX] in {
11161  def : Pat<(v4i64 (abs VR256X:$src)),
11162            (EXTRACT_SUBREG
11163                (VPABSQZrr
11164                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11165             sub_ymm)>;
11166  def : Pat<(v2i64 (abs VR128X:$src)),
11167            (EXTRACT_SUBREG
11168                (VPABSQZrr
11169                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11170             sub_xmm)>;
11171}
11172
11173// Use 512bit version to implement 128/256 bit.
11174multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11175                                 AVX512VLVectorVTInfo _, Predicate prd> {
11176  let Predicates = [prd, NoVLX] in {
11177    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11178              (EXTRACT_SUBREG
11179                (!cast<Instruction>(InstrStr # "Zrr")
11180                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11181                                 _.info256.RC:$src1,
11182                                 _.info256.SubRegIdx)),
11183              _.info256.SubRegIdx)>;
11184
11185    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11186              (EXTRACT_SUBREG
11187                (!cast<Instruction>(InstrStr # "Zrr")
11188                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11189                                 _.info128.RC:$src1,
11190                                 _.info128.SubRegIdx)),
11191              _.info128.SubRegIdx)>;
11192  }
11193}
11194
11195defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11196                                        SchedWriteVecIMul, HasCDI>;
11197
11198// FIXME: Is there a better scheduler class for VPCONFLICT?
11199defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11200                                        SchedWriteVecALU, HasCDI>;
11201
11202// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11203defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11204defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11205
11206//===---------------------------------------------------------------------===//
11207// Counts number of ones - VPOPCNTD and VPOPCNTQ
11208//===---------------------------------------------------------------------===//
11209
11210// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11211defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11212                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11213
11214defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11215defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11216
11217//===---------------------------------------------------------------------===//
11218// Replicate Single FP - MOVSHDUP and MOVSLDUP
11219//===---------------------------------------------------------------------===//
11220
11221multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11222                            X86SchedWriteWidths sched> {
11223  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11224                                      avx512vl_f32_info, HasAVX512>, TB, XS;
11225}
11226
11227defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11228                                  SchedWriteFShuffle>;
11229defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11230                                  SchedWriteFShuffle>;
11231
11232//===----------------------------------------------------------------------===//
11233// AVX-512 - MOVDDUP
11234//===----------------------------------------------------------------------===//
11235
11236multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11237                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11238  let ExeDomain = _.ExeDomain in {
11239  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11240                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11241                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11242                   Sched<[sched]>;
11243  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11244                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11245                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11246                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11247                 Sched<[sched.Folded]>;
11248  }
11249}
11250
11251multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11252                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11253  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11254                           VTInfo.info512>, EVEX_V512;
11255
11256  let Predicates = [HasAVX512, HasVLX] in {
11257    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11258                                VTInfo.info256>, EVEX_V256;
11259    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11260                                   VTInfo.info128>, EVEX_V128;
11261  }
11262}
11263
11264multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11265                          X86SchedWriteWidths sched> {
11266  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11267                                        avx512vl_f64_info>, TB, XD, REX_W;
11268}
11269
11270defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11271
11272let Predicates = [HasVLX] in {
11273def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11274          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11275
11276def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11277                        (v2f64 VR128X:$src0)),
11278          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11279                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11280def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11281                        immAllZerosV),
11282          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11283}
11284
11285//===----------------------------------------------------------------------===//
11286// AVX-512 - Unpack Instructions
11287//===----------------------------------------------------------------------===//
11288
11289let Uses = []<Register>, mayRaiseFPException = 0 in {
11290defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11291                                 SchedWriteFShuffleSizes, 0, 1>;
11292defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11293                                 SchedWriteFShuffleSizes>;
11294}
11295
11296defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11297                                       SchedWriteShuffle, HasBWI>;
11298defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11299                                       SchedWriteShuffle, HasBWI>;
11300defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11301                                       SchedWriteShuffle, HasBWI>;
11302defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11303                                       SchedWriteShuffle, HasBWI>;
11304
11305defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11306                                       SchedWriteShuffle, HasAVX512>;
11307defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11308                                       SchedWriteShuffle, HasAVX512>;
11309defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11310                                        SchedWriteShuffle, HasAVX512>;
11311defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11312                                        SchedWriteShuffle, HasAVX512>;
11313
11314//===----------------------------------------------------------------------===//
11315// AVX-512 - Extract & Insert Integer Instructions
11316//===----------------------------------------------------------------------===//
11317
11318multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11319                                                            X86VectorVTInfo _> {
11320  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11321              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11322              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11323              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11324                       addr:$dst)]>,
11325              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11326}
11327
11328multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11329  let Predicates = [HasBWI] in {
11330    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11331                  (ins _.RC:$src1, u8imm:$src2),
11332                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11333                  [(set GR32orGR64:$dst,
11334                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11335                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11336
11337    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11338  }
11339}
11340
11341multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11342  let Predicates = [HasBWI] in {
11343    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11344                  (ins _.RC:$src1, u8imm:$src2),
11345                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11346                  [(set GR32orGR64:$dst,
11347                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11348                  EVEX, TB, PD, Sched<[WriteVecExtract]>;
11349
11350    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11351    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11352                   (ins _.RC:$src1, u8imm:$src2),
11353                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11354                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11355
11356    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11357  }
11358}
11359
11360multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11361                                                            RegisterClass GRC> {
11362  let Predicates = [HasDQI] in {
11363    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11364                  (ins _.RC:$src1, u8imm:$src2),
11365                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11366                  [(set GRC:$dst,
11367                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11368                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11369
11370    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11371                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11372                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11373                [(store (extractelt (_.VT _.RC:$src1),
11374                                    imm:$src2),addr:$dst)]>,
11375                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11376                Sched<[WriteVecExtractSt]>;
11377  }
11378}
11379
11380defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11381defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11382defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11383defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11384
11385multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11386                                            X86VectorVTInfo _, PatFrag LdFrag,
11387                                            SDPatternOperator immoperator> {
11388  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11389      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11390      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11391      [(set _.RC:$dst,
11392          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11393      EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11394}
11395
11396multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11397                                            X86VectorVTInfo _, PatFrag LdFrag> {
11398  let Predicates = [HasBWI] in {
11399    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11400        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11401        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11402        [(set _.RC:$dst,
11403            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11404        Sched<[WriteVecInsert]>;
11405
11406    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11407  }
11408}
11409
11410multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11411                                         X86VectorVTInfo _, RegisterClass GRC> {
11412  let Predicates = [HasDQI] in {
11413    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11414        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11415        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11416        [(set _.RC:$dst,
11417            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11418        EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11419
11420    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11421                                    _.ScalarLdFrag, imm>, TA, PD;
11422  }
11423}
11424
11425defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11426                                     extloadi8>, TA, PD, WIG;
11427defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11428                                     extloadi16>, TB, PD, WIG;
11429defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11430defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11431
11432let Predicates = [HasAVX512, NoBWI] in {
11433  def : Pat<(X86pinsrb VR128:$src1,
11434                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11435                       timm:$src3),
11436            (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11437                       timm:$src3)>;
11438}
11439
11440let Predicates = [HasBWI] in {
11441  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11442            (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11443                        GR8:$src2, sub_8bit), timm:$src3)>;
11444  def : Pat<(X86pinsrb VR128:$src1,
11445                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11446                       timm:$src3),
11447            (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11448                        timm:$src3)>;
11449}
11450
11451// Always select FP16 instructions if available.
11452let Predicates = [HasBWI], AddedComplexity = -10 in {
11453  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11454  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11455  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11456  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11457}
11458
11459//===----------------------------------------------------------------------===//
11460// VSHUFPS - VSHUFPD Operations
11461//===----------------------------------------------------------------------===//
11462
11463multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11464  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11465                                    SchedWriteFShuffle>,
11466                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11467                                    TA, EVEX, VVVV;
11468}
11469
11470defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11471defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11472
11473//===----------------------------------------------------------------------===//
11474// AVX-512 - Byte shift Left/Right
11475//===----------------------------------------------------------------------===//
11476
11477multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11478                               Format MRMm, string OpcodeStr,
11479                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11480  def ri : AVX512<opc, MRMr,
11481             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11482             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11483             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11484             Sched<[sched]>;
11485  def mi : AVX512<opc, MRMm,
11486           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11487           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11488           [(set _.RC:$dst,(_.VT (OpNode
11489                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11490                                 (i8 timm:$src2))))]>,
11491           Sched<[sched.Folded, sched.ReadAfterFold]>;
11492}
11493
11494multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11495                                   Format MRMm, string OpcodeStr,
11496                                   X86SchedWriteWidths sched, Predicate prd>{
11497  let Predicates = [prd] in
11498    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11499                                 sched.ZMM, v64i8_info>, EVEX_V512;
11500  let Predicates = [prd, HasVLX] in {
11501    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11502                                    sched.YMM, v32i8x_info>, EVEX_V256;
11503    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11504                                    sched.XMM, v16i8x_info>, EVEX_V128;
11505  }
11506}
11507defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11508                                       SchedWriteShuffle, HasBWI>,
11509                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11510defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11511                                       SchedWriteShuffle, HasBWI>,
11512                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11513
11514multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11515                                string OpcodeStr, X86FoldableSchedWrite sched,
11516                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11517  let isCommutable = 1 in
11518  def rr : AVX512BI<opc, MRMSrcReg,
11519             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11520             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11521             [(set _dst.RC:$dst,(_dst.VT
11522                                (OpNode (_src.VT _src.RC:$src1),
11523                                        (_src.VT _src.RC:$src2))))]>,
11524             Sched<[sched]>;
11525  def rm : AVX512BI<opc, MRMSrcMem,
11526           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11527           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11528           [(set _dst.RC:$dst,(_dst.VT
11529                              (OpNode (_src.VT _src.RC:$src1),
11530                              (_src.VT (bitconvert
11531                                        (_src.LdFrag addr:$src2))))))]>,
11532           Sched<[sched.Folded, sched.ReadAfterFold]>;
11533}
11534
11535multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11536                                    string OpcodeStr, X86SchedWriteWidths sched,
11537                                    Predicate prd> {
11538  let Predicates = [prd] in
11539    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11540                                  v8i64_info, v64i8_info>, EVEX_V512;
11541  let Predicates = [prd, HasVLX] in {
11542    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11543                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11544    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11545                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11546  }
11547}
11548
11549defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11550                                        SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11551
11552// Transforms to swizzle an immediate to enable better matching when
11553// memory operand isn't in the right place.
11554def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11555  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11556  uint8_t Imm = N->getZExtValue();
11557  // Swap bits 1/4 and 3/6.
11558  uint8_t NewImm = Imm & 0xa5;
11559  if (Imm & 0x02) NewImm |= 0x10;
11560  if (Imm & 0x10) NewImm |= 0x02;
11561  if (Imm & 0x08) NewImm |= 0x40;
11562  if (Imm & 0x40) NewImm |= 0x08;
11563  return getI8Imm(NewImm, SDLoc(N));
11564}]>;
11565def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11566  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11567  uint8_t Imm = N->getZExtValue();
11568  // Swap bits 2/4 and 3/5.
11569  uint8_t NewImm = Imm & 0xc3;
11570  if (Imm & 0x04) NewImm |= 0x10;
11571  if (Imm & 0x10) NewImm |= 0x04;
11572  if (Imm & 0x08) NewImm |= 0x20;
11573  if (Imm & 0x20) NewImm |= 0x08;
11574  return getI8Imm(NewImm, SDLoc(N));
11575}]>;
11576def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11577  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11578  uint8_t Imm = N->getZExtValue();
11579  // Swap bits 1/2 and 5/6.
11580  uint8_t NewImm = Imm & 0x99;
11581  if (Imm & 0x02) NewImm |= 0x04;
11582  if (Imm & 0x04) NewImm |= 0x02;
11583  if (Imm & 0x20) NewImm |= 0x40;
11584  if (Imm & 0x40) NewImm |= 0x20;
11585  return getI8Imm(NewImm, SDLoc(N));
11586}]>;
11587def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11588  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11589  uint8_t Imm = N->getZExtValue();
11590  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11591  uint8_t NewImm = Imm & 0x81;
11592  if (Imm & 0x02) NewImm |= 0x04;
11593  if (Imm & 0x04) NewImm |= 0x10;
11594  if (Imm & 0x08) NewImm |= 0x40;
11595  if (Imm & 0x10) NewImm |= 0x02;
11596  if (Imm & 0x20) NewImm |= 0x08;
11597  if (Imm & 0x40) NewImm |= 0x20;
11598  return getI8Imm(NewImm, SDLoc(N));
11599}]>;
11600def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11601  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11602  uint8_t Imm = N->getZExtValue();
11603  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11604  uint8_t NewImm = Imm & 0x81;
11605  if (Imm & 0x02) NewImm |= 0x10;
11606  if (Imm & 0x04) NewImm |= 0x02;
11607  if (Imm & 0x08) NewImm |= 0x20;
11608  if (Imm & 0x10) NewImm |= 0x04;
11609  if (Imm & 0x20) NewImm |= 0x40;
11610  if (Imm & 0x40) NewImm |= 0x08;
11611  return getI8Imm(NewImm, SDLoc(N));
11612}]>;
11613
11614multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11616                          string Name>{
11617  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11618  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11619                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11620                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11621                      (OpNode (_.VT _.RC:$src1),
11622                              (_.VT _.RC:$src2),
11623                              (_.VT _.RC:$src3),
11624                              (i8 timm:$src4)), 1, 1>,
11625                      AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11626  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11627                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11628                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11629                    (OpNode (_.VT _.RC:$src1),
11630                            (_.VT _.RC:$src2),
11631                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11632                            (i8 timm:$src4)), 1, 0>,
11633                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11634                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11635  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11636                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11637                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11638                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11639                    (OpNode (_.VT _.RC:$src1),
11640                            (_.VT _.RC:$src2),
11641                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11642                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11643                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11644                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11645  }// Constraints = "$src1 = $dst"
11646
11647  // Additional patterns for matching passthru operand in other positions.
11648  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11649                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11650                   _.RC:$src1)),
11651            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11652             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11653  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11654                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11655                   _.RC:$src1)),
11656            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11657             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11658
11659  // Additional patterns for matching zero masking with loads in other
11660  // positions.
11661  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11662                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11663                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11664                   _.ImmAllZerosV)),
11665            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11666             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11667  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11668                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11669                    _.RC:$src2, (i8 timm:$src4)),
11670                   _.ImmAllZerosV)),
11671            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11672             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11673
11674  // Additional patterns for matching masked loads with different
11675  // operand orders.
11676  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11677                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11678                    _.RC:$src2, (i8 timm:$src4)),
11679                   _.RC:$src1)),
11680            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11681             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11682  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11683                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11684                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11685                   _.RC:$src1)),
11686            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11687             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11688  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11689                   (OpNode _.RC:$src2, _.RC:$src1,
11690                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11691                   _.RC:$src1)),
11692            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11693             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11694  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11695                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11696                    _.RC:$src1, (i8 timm:$src4)),
11697                   _.RC:$src1)),
11698            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11699             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11700  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11701                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11702                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11703                   _.RC:$src1)),
11704            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11705             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11706
11707  // Additional patterns for matching zero masking with broadcasts in other
11708  // positions.
11709  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11710                   (OpNode (_.BroadcastLdFrag addr:$src3),
11711                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11712                   _.ImmAllZerosV)),
11713            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11714             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11715             (VPTERNLOG321_imm8 timm:$src4))>;
11716  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11717                   (OpNode _.RC:$src1,
11718                    (_.BroadcastLdFrag addr:$src3),
11719                    _.RC:$src2, (i8 timm:$src4)),
11720                   _.ImmAllZerosV)),
11721            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11722             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11723             (VPTERNLOG132_imm8 timm:$src4))>;
11724
11725  // Additional patterns for matching masked broadcasts with different
11726  // operand orders.
11727  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11728                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11729                    _.RC:$src2, (i8 timm:$src4)),
11730                   _.RC:$src1)),
11731            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11732             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11733  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11734                   (OpNode (_.BroadcastLdFrag addr:$src3),
11735                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11736                   _.RC:$src1)),
11737            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11738             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11739  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11740                   (OpNode _.RC:$src2, _.RC:$src1,
11741                    (_.BroadcastLdFrag addr:$src3),
11742                    (i8 timm:$src4)), _.RC:$src1)),
11743            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11744             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11745  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11746                   (OpNode _.RC:$src2,
11747                    (_.BroadcastLdFrag addr:$src3),
11748                    _.RC:$src1, (i8 timm:$src4)),
11749                   _.RC:$src1)),
11750            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11751             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11752  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11753                   (OpNode (_.BroadcastLdFrag addr:$src3),
11754                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11755                   _.RC:$src1)),
11756            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11757             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11758}
11759
11760multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11761                                 AVX512VLVectorVTInfo _> {
11762  let Predicates = [HasAVX512] in
11763    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11764                               _.info512, NAME>, EVEX_V512;
11765  let Predicates = [HasAVX512, HasVLX] in {
11766    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11767                               _.info128, NAME>, EVEX_V128;
11768    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11769                               _.info256, NAME>, EVEX_V256;
11770  }
11771}
11772
11773defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11774                                        avx512vl_i32_info>;
11775defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11776                                        avx512vl_i64_info>, REX_W;
11777
11778// Patterns to implement vnot using vpternlog instead of creating all ones
11779// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11780// so that the result is only dependent on src0. But we use the same source
11781// for all operands to prevent a false dependency.
11782// TODO: We should maybe have a more generalized algorithm for folding to
11783// vpternlog.
11784let Predicates = [HasAVX512] in {
11785  def : Pat<(v64i8 (vnot VR512:$src)),
11786            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11787  def : Pat<(v32i16 (vnot VR512:$src)),
11788            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11789  def : Pat<(v16i32 (vnot VR512:$src)),
11790            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11791  def : Pat<(v8i64 (vnot VR512:$src)),
11792            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11793}
11794
11795let Predicates = [HasAVX512, NoVLX] in {
11796  def : Pat<(v16i8 (vnot VR128X:$src)),
11797            (EXTRACT_SUBREG
11798             (VPTERNLOGQZrri
11799              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11800              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11801              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11802              (i8 15)), sub_xmm)>;
11803  def : Pat<(v8i16 (vnot VR128X:$src)),
11804            (EXTRACT_SUBREG
11805             (VPTERNLOGQZrri
11806              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11807              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11808              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11809              (i8 15)), sub_xmm)>;
11810  def : Pat<(v4i32 (vnot VR128X:$src)),
11811            (EXTRACT_SUBREG
11812             (VPTERNLOGQZrri
11813              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11814              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11815              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11816              (i8 15)), sub_xmm)>;
11817  def : Pat<(v2i64 (vnot VR128X:$src)),
11818            (EXTRACT_SUBREG
11819             (VPTERNLOGQZrri
11820              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11821              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11822              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11823              (i8 15)), sub_xmm)>;
11824
11825  def : Pat<(v32i8 (vnot VR256X:$src)),
11826            (EXTRACT_SUBREG
11827             (VPTERNLOGQZrri
11828              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11829              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11830              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11831              (i8 15)), sub_ymm)>;
11832  def : Pat<(v16i16 (vnot VR256X:$src)),
11833            (EXTRACT_SUBREG
11834             (VPTERNLOGQZrri
11835              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11836              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11837              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11838              (i8 15)), sub_ymm)>;
11839  def : Pat<(v8i32 (vnot VR256X:$src)),
11840            (EXTRACT_SUBREG
11841             (VPTERNLOGQZrri
11842              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11843              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11844              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11845              (i8 15)), sub_ymm)>;
11846  def : Pat<(v4i64 (vnot VR256X:$src)),
11847            (EXTRACT_SUBREG
11848             (VPTERNLOGQZrri
11849              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11850              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11851              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11852              (i8 15)), sub_ymm)>;
11853}
11854
11855let Predicates = [HasVLX] in {
11856  def : Pat<(v16i8 (vnot VR128X:$src)),
11857            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11858  def : Pat<(v8i16 (vnot VR128X:$src)),
11859            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11860  def : Pat<(v4i32 (vnot VR128X:$src)),
11861            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11862  def : Pat<(v2i64 (vnot VR128X:$src)),
11863            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11864
11865  def : Pat<(v32i8 (vnot VR256X:$src)),
11866            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11867  def : Pat<(v16i16 (vnot VR256X:$src)),
11868            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11869  def : Pat<(v8i32 (vnot VR256X:$src)),
11870            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11871  def : Pat<(v4i64 (vnot VR256X:$src)),
11872            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11873}
11874
11875//===----------------------------------------------------------------------===//
11876// AVX-512 - FixupImm
11877//===----------------------------------------------------------------------===//
11878
11879multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11880                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11881                                  X86VectorVTInfo TblVT>{
11882  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11883      Uses = [MXCSR], mayRaiseFPException = 1 in {
11884    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11885                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11886                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11887                        (X86VFixupimm (_.VT _.RC:$src1),
11888                                      (_.VT _.RC:$src2),
11889                                      (TblVT.VT _.RC:$src3),
11890                                      (i32 timm:$src4))>, Sched<[sched]>;
11891    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11892                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11893                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11894                      (X86VFixupimm (_.VT _.RC:$src1),
11895                                    (_.VT _.RC:$src2),
11896                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11897                                    (i32 timm:$src4))>,
11898                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11899    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11900                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11901                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11902                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11903                      (X86VFixupimm (_.VT _.RC:$src1),
11904                                    (_.VT _.RC:$src2),
11905                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11906                                    (i32 timm:$src4))>,
11907                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11908  } // Constraints = "$src1 = $dst"
11909}
11910
11911multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11912                                      X86FoldableSchedWrite sched,
11913                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11914  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11915let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11916  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11917                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11918                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11919                      "$src2, $src3, {sae}, $src4",
11920                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11921                                       (_.VT _.RC:$src2),
11922                                       (TblVT.VT _.RC:$src3),
11923                                       (i32 timm:$src4))>,
11924                      EVEX_B, Sched<[sched]>;
11925  }
11926}
11927
11928multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11929                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11930                                  X86VectorVTInfo _src3VT> {
11931  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11932      ExeDomain = _.ExeDomain in {
11933    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11934                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11935                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11936                      (X86VFixupimms (_.VT _.RC:$src1),
11937                                     (_.VT _.RC:$src2),
11938                                     (_src3VT.VT _src3VT.RC:$src3),
11939                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11940    let Uses = [MXCSR] in
11941    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11942                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11943                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11944                      "$src2, $src3, {sae}, $src4",
11945                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11946                                        (_.VT _.RC:$src2),
11947                                        (_src3VT.VT _src3VT.RC:$src3),
11948                                        (i32 timm:$src4))>,
11949                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11950    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11951                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11952                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11953                     (X86VFixupimms (_.VT _.RC:$src1),
11954                                    (_.VT _.RC:$src2),
11955                                    (_src3VT.VT (scalar_to_vector
11956                                              (_src3VT.ScalarLdFrag addr:$src3))),
11957                                    (i32 timm:$src4))>,
11958                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11959  }
11960}
11961
11962multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11963                                      AVX512VLVectorVTInfo _Vec,
11964                                      AVX512VLVectorVTInfo _Tbl> {
11965  let Predicates = [HasAVX512] in
11966    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11967                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11968                                EVEX, VVVV, EVEX_V512;
11969  let Predicates = [HasAVX512, HasVLX] in {
11970    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11971                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11972                            EVEX, VVVV, EVEX_V128;
11973    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11974                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11975                            EVEX, VVVV, EVEX_V256;
11976  }
11977}
11978
11979defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11980                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11981                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
11982defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11983                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11984                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
11985defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11986                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11987defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11988                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
11989
11990// Patterns used to select SSE scalar fp arithmetic instructions from
11991// either:
11992//
11993// (1) a scalar fp operation followed by a blend
11994//
11995// The effect is that the backend no longer emits unnecessary vector
11996// insert instructions immediately after SSE scalar fp instructions
11997// like addss or mulss.
11998//
11999// For example, given the following code:
12000//   __m128 foo(__m128 A, __m128 B) {
12001//     A[0] += B[0];
12002//     return A;
12003//   }
12004//
12005// Previously we generated:
12006//   addss %xmm0, %xmm1
12007//   movss %xmm1, %xmm0
12008//
12009// We now generate:
12010//   addss %xmm1, %xmm0
12011//
12012// (2) a vector packed single/double fp operation followed by a vector insert
12013//
12014// The effect is that the backend converts the packed fp instruction
12015// followed by a vector insert into a single SSE scalar fp instruction.
12016//
12017// For example, given the following code:
12018//   __m128 foo(__m128 A, __m128 B) {
12019//     __m128 C = A + B;
12020//     return (__m128) {c[0], a[1], a[2], a[3]};
12021//   }
12022//
12023// Previously we generated:
12024//   addps %xmm0, %xmm1
12025//   movss %xmm1, %xmm0
12026//
12027// We now generate:
12028//   addss %xmm1, %xmm0
12029
12030// TODO: Some canonicalization in lowering would simplify the number of
12031// patterns we have to try to match.
12032multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12033                                          string OpcPrefix, SDNode MoveNode,
12034                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12035  let Predicates = [HasAVX512] in {
12036    // extracted scalar math op with insert via movss
12037    def : Pat<(MoveNode
12038               (_.VT VR128X:$dst),
12039               (_.VT (scalar_to_vector
12040                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12041                          _.FRC:$src)))),
12042              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12043               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12044    def : Pat<(MoveNode
12045               (_.VT VR128X:$dst),
12046               (_.VT (scalar_to_vector
12047                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12048                          (_.ScalarLdFrag addr:$src))))),
12049              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12050
12051    // extracted masked scalar math op with insert via movss
12052    def : Pat<(MoveNode (_.VT VR128X:$src1),
12053               (scalar_to_vector
12054                (X86selects_mask VK1WM:$mask,
12055                            (MaskedOp (_.EltVT
12056                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12057                                      _.FRC:$src2),
12058                            _.FRC:$src0))),
12059              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12060               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12061               VK1WM:$mask, _.VT:$src1,
12062               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12063    def : Pat<(MoveNode (_.VT VR128X:$src1),
12064               (scalar_to_vector
12065                (X86selects_mask VK1WM:$mask,
12066                            (MaskedOp (_.EltVT
12067                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12068                                      (_.ScalarLdFrag addr:$src2)),
12069                            _.FRC:$src0))),
12070              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12071               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12072               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12073
12074    // extracted masked scalar math op with insert via movss
12075    def : Pat<(MoveNode (_.VT VR128X:$src1),
12076               (scalar_to_vector
12077                (X86selects_mask VK1WM:$mask,
12078                            (MaskedOp (_.EltVT
12079                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12080                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12081      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12082          VK1WM:$mask, _.VT:$src1,
12083          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12084    def : Pat<(MoveNode (_.VT VR128X:$src1),
12085               (scalar_to_vector
12086                (X86selects_mask VK1WM:$mask,
12087                            (MaskedOp (_.EltVT
12088                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12089                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12090      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12091  }
12092}
12093
12094defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12095defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12096defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12097defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12098
12099defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12100defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12101defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12102defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12103
12104defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12105defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12106defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12107defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12108
12109multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12110                                             SDNode Move, X86VectorVTInfo _> {
12111  let Predicates = [HasAVX512] in {
12112    def : Pat<(_.VT (Move _.VT:$dst,
12113                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12114              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12115  }
12116}
12117
12118defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12119defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12120defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12121
12122//===----------------------------------------------------------------------===//
12123// AES instructions
12124//===----------------------------------------------------------------------===//
12125
12126multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12127  let Predicates = [HasVLX, HasVAES] in {
12128    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12129                                  !cast<Intrinsic>(IntPrefix),
12130                                  loadv2i64, 0, VR128X, i128mem>,
12131                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12132    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12133                                  !cast<Intrinsic>(IntPrefix#"_256"),
12134                                  loadv4i64, 0, VR256X, i256mem>,
12135                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12136    }
12137    let Predicates = [HasAVX512, HasVAES] in
12138    defm Z    : AESI_binop_rm_int<Op, OpStr,
12139                                  !cast<Intrinsic>(IntPrefix#"_512"),
12140                                  loadv8i64, 0, VR512, i512mem>,
12141                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12142}
12143
12144defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12145defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12146defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12147defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12148
12149//===----------------------------------------------------------------------===//
12150// PCLMUL instructions - Carry less multiplication
12151//===----------------------------------------------------------------------===//
12152
12153let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12154defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12155                              EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12156
12157let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12158defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12159                              EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12160
12161defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12162                                int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12163                                EVEX_CD8<64, CD8VF>, WIG;
12164}
12165
12166// Aliases
12167defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12168defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12169defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12170
12171//===----------------------------------------------------------------------===//
12172// VBMI2
12173//===----------------------------------------------------------------------===//
12174
12175multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12176                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12177  let Constraints = "$src1 = $dst",
12178      ExeDomain   = VTI.ExeDomain in {
12179    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12180                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12181                "$src3, $src2", "$src2, $src3",
12182                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12183                T8, PD, EVEX, VVVV, Sched<[sched]>;
12184    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12185                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12186                "$src3, $src2", "$src2, $src3",
12187                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12188                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12189                T8, PD, EVEX, VVVV,
12190                Sched<[sched.Folded, sched.ReadAfterFold]>;
12191  }
12192}
12193
12194multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12195                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12196         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12197  let Constraints = "$src1 = $dst",
12198      ExeDomain   = VTI.ExeDomain in
12199  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12200              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12201              "${src3}"#VTI.BroadcastStr#", $src2",
12202              "$src2, ${src3}"#VTI.BroadcastStr,
12203              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12204               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12205              T8, PD, EVEX, VVVV, EVEX_B,
12206              Sched<[sched.Folded, sched.ReadAfterFold]>;
12207}
12208
12209multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12210                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12211  let Predicates = [HasVBMI2] in
12212  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12213                                   EVEX_V512;
12214  let Predicates = [HasVBMI2, HasVLX] in {
12215    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12216                                   EVEX_V256;
12217    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12218                                   EVEX_V128;
12219  }
12220}
12221
12222multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12223                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12224  let Predicates = [HasVBMI2] in
12225  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12226                                    EVEX_V512;
12227  let Predicates = [HasVBMI2, HasVLX] in {
12228    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12229                                    EVEX_V256;
12230    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12231                                    EVEX_V128;
12232  }
12233}
12234multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12235                           SDNode OpNode, X86SchedWriteWidths sched> {
12236  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12237             avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12238  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12239             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12240  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12241             avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12242}
12243
12244multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12245                           SDNode OpNode, X86SchedWriteWidths sched> {
12246  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12247             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12248             REX_W, EVEX_CD8<16, CD8VF>;
12249  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12250             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12251  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12252             sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12253}
12254
12255// Concat & Shift
12256defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12257defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12258defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12259defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12260
12261// Compress
12262defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12263                                         avx512vl_i8_info, HasVBMI2>, EVEX;
12264defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12265                                          avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12266// Expand
12267defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12268                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12269defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12270                                      avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12271
12272//===----------------------------------------------------------------------===//
12273// VNNI
12274//===----------------------------------------------------------------------===//
12275
12276let Constraints = "$src1 = $dst" in
12277multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12278                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12279                    bit IsCommutable> {
12280  let ExeDomain = VTI.ExeDomain in {
12281  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12282                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12283                                   "$src3, $src2", "$src2, $src3",
12284                                   (VTI.VT (OpNode VTI.RC:$src1,
12285                                            VTI.RC:$src2, VTI.RC:$src3)),
12286                                   IsCommutable, IsCommutable>,
12287                                   EVEX, VVVV, T8, PD, Sched<[sched]>;
12288  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12289                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12290                                   "$src3, $src2", "$src2, $src3",
12291                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12292                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12293                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12294                                   Sched<[sched.Folded, sched.ReadAfterFold,
12295                                          sched.ReadAfterFold]>;
12296  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12297                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12298                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12299                                   "$src2, ${src3}"#VTI.BroadcastStr,
12300                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12301                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12302                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12303                                   T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12304                                                sched.ReadAfterFold]>;
12305  }
12306}
12307
12308multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12309                       X86SchedWriteWidths sched, bit IsCommutable> {
12310  let Predicates = [HasVNNI] in
12311  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12312                           IsCommutable>, EVEX_V512;
12313  let Predicates = [HasVNNI, HasVLX] in {
12314    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12315                           IsCommutable>, EVEX_V256;
12316    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12317                           IsCommutable>, EVEX_V128;
12318  }
12319}
12320
12321// FIXME: Is there a better scheduler class for VPDP?
12322defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12323defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12324defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12325defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12326
12327// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12328let Predicates = [HasVNNI] in {
12329  def : Pat<(v16i32 (add VR512:$src1,
12330                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12331            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12332  def : Pat<(v16i32 (add VR512:$src1,
12333                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12334            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12335}
12336let Predicates = [HasVNNI,HasVLX] in {
12337  def : Pat<(v8i32 (add VR256X:$src1,
12338                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12339            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12340  def : Pat<(v8i32 (add VR256X:$src1,
12341                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12342            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12343  def : Pat<(v4i32 (add VR128X:$src1,
12344                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12345            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12346  def : Pat<(v4i32 (add VR128X:$src1,
12347                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12348            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12349}
12350
12351//===----------------------------------------------------------------------===//
12352// Bit Algorithms
12353//===----------------------------------------------------------------------===//
12354
12355// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12356defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12357                                   avx512vl_i8_info, HasBITALG>;
12358defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12359                                   avx512vl_i16_info, HasBITALG>, REX_W;
12360
12361defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12362defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12363
12364multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12365  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12366                                (ins VTI.RC:$src1, VTI.RC:$src2),
12367                                "vpshufbitqmb",
12368                                "$src2, $src1", "$src1, $src2",
12369                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12370                                (VTI.VT VTI.RC:$src2)),
12371                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12372                                (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12373                                Sched<[sched]>;
12374  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12375                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12376                                "vpshufbitqmb",
12377                                "$src2, $src1", "$src1, $src2",
12378                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12379                                (VTI.VT (VTI.LdFrag addr:$src2))),
12380                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12381                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12382                                EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12383                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12384}
12385
12386multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12387  let Predicates = [HasBITALG] in
12388  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12389  let Predicates = [HasBITALG, HasVLX] in {
12390    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12391    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12392  }
12393}
12394
12395// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12396defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12397
12398//===----------------------------------------------------------------------===//
12399// GFNI
12400//===----------------------------------------------------------------------===//
12401
12402multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12403                                   X86SchedWriteWidths sched> {
12404  let Predicates = [HasGFNI, HasAVX512] in
12405  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12406                                EVEX_V512;
12407  let Predicates = [HasGFNI, HasVLX] in {
12408    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12409                                EVEX_V256;
12410    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12411                                EVEX_V128;
12412  }
12413}
12414
12415defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12416                                          SchedWriteVecALU>,
12417                                          EVEX_CD8<8, CD8VF>, T8;
12418
12419multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12420                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12421                                      X86VectorVTInfo BcstVTI>
12422           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12423  let ExeDomain = VTI.ExeDomain in
12424  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12425                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12426                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12427                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12428                (OpNode (VTI.VT VTI.RC:$src1),
12429                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12430                 (i8 timm:$src3))>, EVEX_B,
12431                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12432}
12433
12434multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12435                                     X86SchedWriteWidths sched> {
12436  let Predicates = [HasGFNI, HasAVX512] in
12437  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12438                                           v64i8_info, v8i64_info>, EVEX_V512;
12439  let Predicates = [HasGFNI, HasVLX] in {
12440    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12441                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12442    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12443                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12444  }
12445}
12446
12447defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12448                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12449                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12450defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12451                         X86GF2P8affineqb, SchedWriteVecIMul>,
12452                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12453
12454
12455//===----------------------------------------------------------------------===//
12456// AVX5124FMAPS
12457//===----------------------------------------------------------------------===//
12458
12459let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12460    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12461defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12462                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12463                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12464                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12465                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12466
12467defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12468                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12469                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12470                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12471                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12472
12473defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12474                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12475                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12476                    []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12477                    Sched<[SchedWriteFMA.Scl.Folded]>;
12478
12479defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12480                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12481                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12482                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12483                     Sched<[SchedWriteFMA.Scl.Folded]>;
12484}
12485
12486//===----------------------------------------------------------------------===//
12487// AVX5124VNNIW
12488//===----------------------------------------------------------------------===//
12489
12490let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12491    Constraints = "$src1 = $dst" in {
12492defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12493                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12494                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12495                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12496                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12497
12498defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12499                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12500                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12501                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12502                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12503}
12504
12505let hasSideEffects = 0 in {
12506  let mayStore = 1, SchedRW = [WriteFStoreX] in
12507  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12508  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12509  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12510}
12511
12512//===----------------------------------------------------------------------===//
12513// VP2INTERSECT
12514//===----------------------------------------------------------------------===//
12515
12516multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12517  def rr : I<0x68, MRMSrcReg,
12518                  (outs _.KRPC:$dst),
12519                  (ins _.RC:$src1, _.RC:$src2),
12520                  !strconcat("vp2intersect", _.Suffix,
12521                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12522                  [(set _.KRPC:$dst, (X86vp2intersect
12523                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12524                  EVEX, VVVV, T8, XD, Sched<[sched]>;
12525
12526  def rm : I<0x68, MRMSrcMem,
12527                  (outs _.KRPC:$dst),
12528                  (ins  _.RC:$src1, _.MemOp:$src2),
12529                  !strconcat("vp2intersect", _.Suffix,
12530                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12531                  [(set _.KRPC:$dst, (X86vp2intersect
12532                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12533                  EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12534                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12535
12536  def rmb : I<0x68, MRMSrcMem,
12537                  (outs _.KRPC:$dst),
12538                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12539                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12540                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12541                  [(set _.KRPC:$dst, (X86vp2intersect
12542                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12543                  EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12544                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12545}
12546
12547multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12548  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12549    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12550
12551  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12552    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12553    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12554  }
12555}
12556
12557let ExeDomain = SSEPackedInt in {
12558defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12559defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12560}
12561
12562multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12563                             X86SchedWriteWidths sched,
12564                             AVX512VLVectorVTInfo _SrcVTInfo,
12565                             AVX512VLVectorVTInfo _DstVTInfo,
12566                             SDNode OpNode, Predicate prd,
12567                             bit IsCommutable = 0> {
12568  let Predicates = [prd] in
12569    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12570                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12571                                   _SrcVTInfo.info512, IsCommutable>,
12572                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12573  let Predicates = [HasVLX, prd] in {
12574    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12575                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12576                                      _SrcVTInfo.info256, IsCommutable>,
12577                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12578    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12579                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12580                                      _SrcVTInfo.info128, IsCommutable>,
12581                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12582  }
12583}
12584
12585let ExeDomain = SSEPackedSingle in
12586defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12587                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12588                                        avx512vl_f32_info, avx512vl_bf16_info,
12589                                        X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12590
12591// Truncate Float to BFloat16
12592multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12593                             X86SchedWriteWidths sched> {
12594  let ExeDomain = SSEPackedSingle in {
12595  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12596    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12597                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12598  }
12599  let Predicates = [HasBF16, HasVLX] in {
12600    let Uses = []<Register>, mayRaiseFPException = 0 in {
12601    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12602                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12603                               VK4WM>, EVEX_V128;
12604    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12605                               X86cvtneps2bf16, X86cvtneps2bf16,
12606                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12607    }
12608  } // Predicates = [HasBF16, HasVLX]
12609  } // ExeDomain = SSEPackedSingle
12610
12611  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12612                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12613                  VR128X:$src), 0>;
12614  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12615                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12616                  f128mem:$src), 0, "intel">;
12617  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12618                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12619                  VR256X:$src), 0>;
12620  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12621                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12622                  f256mem:$src), 0, "intel">;
12623}
12624
12625defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12626                                       SchedWriteCvtPD2PS>, T8, XS,
12627                                       EVEX_CD8<32, CD8VF>;
12628
12629let Predicates = [HasBF16, HasVLX] in {
12630  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12631  // patterns have been disabled with null_frag.
12632  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12633            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12634  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12635                              VK4WM:$mask),
12636            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12637  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12638                              VK4WM:$mask),
12639            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12640
12641  def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12642            (VCVTNEPS2BF16Z128rm addr:$src)>;
12643  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12644                              VK4WM:$mask),
12645            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12646  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12647                              VK4WM:$mask),
12648            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12649
12650  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12651                                     (X86VBroadcastld32 addr:$src)))),
12652            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12653  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12654                              (v8bf16 VR128X:$src0), VK4WM:$mask),
12655            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12656  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12657                              v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12658            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12659
12660  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12661            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12662  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12663            (VCVTNEPS2BF16Z128rm addr:$src)>;
12664
12665  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12666            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12667  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12668            (VCVTNEPS2BF16Z256rm addr:$src)>;
12669
12670  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12671            (VPBROADCASTWZ128rm addr:$src)>;
12672  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12673            (VPBROADCASTWZ256rm addr:$src)>;
12674
12675  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12676            (VPBROADCASTWZ128rr VR128X:$src)>;
12677  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12678            (VPBROADCASTWZ256rr VR128X:$src)>;
12679
12680  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12681            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12682  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12683            (VCVTNEPS2BF16Z256rm addr:$src)>;
12684
12685  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12686}
12687
12688let Predicates = [HasBF16] in {
12689  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12690            (VPBROADCASTWZrm addr:$src)>;
12691
12692  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12693            (VPBROADCASTWZrr VR128X:$src)>;
12694
12695  def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12696            (VCVTNEPS2BF16Zrr VR512:$src)>;
12697  def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12698            (VCVTNEPS2BF16Zrm addr:$src)>;
12699  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12700}
12701
12702let Constraints = "$src1 = $dst" in {
12703multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12704                              X86FoldableSchedWrite sched,
12705                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12706  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12707                           (ins src_v.RC:$src2, src_v.RC:$src3),
12708                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12709                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12710                           EVEX, VVVV, Sched<[sched]>;
12711
12712  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12713                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12714                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12715                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12716                               (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12717                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12718
12719  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12720                  (ins src_v.RC:$src2, f32mem:$src3),
12721                  OpcodeStr,
12722                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12723                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12724                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12725                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12726                  EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12727
12728}
12729} // Constraints = "$src1 = $dst"
12730
12731multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12732                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12733                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12734  let Predicates = [prd] in {
12735    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12736                                   src_v.info512>, EVEX_V512;
12737  }
12738  let Predicates = [HasVLX, prd] in {
12739    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12740                                   src_v.info256>, EVEX_V256;
12741    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12742                                   src_v.info128>, EVEX_V128;
12743  }
12744}
12745
12746let ExeDomain = SSEPackedSingle in
12747defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12748                                       avx512vl_f32_info, avx512vl_bf16_info,
12749                                       HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12750
12751//===----------------------------------------------------------------------===//
12752// AVX512FP16
12753//===----------------------------------------------------------------------===//
12754
12755let Predicates = [HasFP16] in {
12756// Move word ( r/m16) to Packed word
12757def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12758                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12759def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12760                      "vmovw\t{$src, $dst|$dst, $src}",
12761                      [(set VR128X:$dst,
12762                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12763                      T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12764
12765def : Pat<(f16 (bitconvert GR16:$src)),
12766          (f16 (COPY_TO_REGCLASS
12767                (VMOVW2SHrr
12768                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12769                FR16X))>;
12770def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12771          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12772def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12773          (VMOVW2SHrr GR32:$src)>;
12774// FIXME: We should really find a way to improve these patterns.
12775def : Pat<(v8i32 (X86vzmovl
12776                  (insert_subvector undef,
12777                                    (v4i32 (scalar_to_vector
12778                                            (and GR32:$src, 0xffff))),
12779                                    (iPTR 0)))),
12780          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12781def : Pat<(v16i32 (X86vzmovl
12782                   (insert_subvector undef,
12783                                     (v4i32 (scalar_to_vector
12784                                             (and GR32:$src, 0xffff))),
12785                                     (iPTR 0)))),
12786          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12787
12788def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12789          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12790
12791// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12792def : Pat<(v8i16 (X86vzload16 addr:$src)),
12793          (VMOVWrm addr:$src)>;
12794def : Pat<(v16i16 (X86vzload16 addr:$src)),
12795          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12796
12797// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12798def : Pat<(v32i16 (X86vzload16 addr:$src)),
12799          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12800
12801def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12802          (VMOVWrm addr:$src)>;
12803def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12804          (VMOVWrm addr:$src)>;
12805def : Pat<(v8i32 (X86vzmovl
12806                  (insert_subvector undef,
12807                                    (v4i32 (scalar_to_vector
12808                                            (i32 (zextloadi16 addr:$src)))),
12809                                    (iPTR 0)))),
12810          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12811def : Pat<(v16i32 (X86vzmovl
12812                   (insert_subvector undef,
12813                                     (v4i32 (scalar_to_vector
12814                                             (i32 (zextloadi16 addr:$src)))),
12815                                     (iPTR 0)))),
12816          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12817
12818// Move word from xmm register to r/m16
12819def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12820                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12821def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12822                       (ins i16mem:$dst, VR128X:$src),
12823                       "vmovw\t{$src, $dst|$dst, $src}",
12824                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12825                                     (iPTR 0))), addr:$dst)]>,
12826                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12827
12828def : Pat<(i16 (bitconvert FR16X:$src)),
12829          (i16 (EXTRACT_SUBREG
12830                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12831                sub_16bit))>;
12832def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12833          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12834
12835// Allow "vmovw" to use GR64
12836let hasSideEffects = 0 in {
12837  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12838                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12839  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12840                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12841}
12842}
12843
12844// Convert 16-bit float to i16/u16
12845multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12846                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12847                          AVX512VLVectorVTInfo _Dst,
12848                          AVX512VLVectorVTInfo _Src,
12849                          X86SchedWriteWidths sched> {
12850  let Predicates = [HasFP16] in {
12851    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12852                            OpNode, MaskOpNode, sched.ZMM>,
12853             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12854                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12855  }
12856  let Predicates = [HasFP16, HasVLX] in {
12857    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12858                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12859    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12860                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12861  }
12862}
12863
12864// Convert 16-bit float to i16/u16 truncate
12865multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12866                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12867                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12868                           X86SchedWriteWidths sched> {
12869  let Predicates = [HasFP16] in {
12870    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12871                            OpNode, MaskOpNode, sched.ZMM>,
12872             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12873                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12874  }
12875  let Predicates = [HasFP16, HasVLX] in {
12876    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12877                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12878    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12879                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12880  }
12881}
12882
12883defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12884                                X86cvtp2UIntRnd, avx512vl_i16_info,
12885                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12886                                T_MAP5, EVEX_CD8<16, CD8VF>;
12887defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12888                                X86VUintToFpRnd, avx512vl_f16_info,
12889                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12890                                T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12891defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12892                                X86cvttp2si, X86cvttp2siSAE,
12893                                avx512vl_i16_info, avx512vl_f16_info,
12894                                SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12895defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
12896                                X86cvttp2ui, X86cvttp2uiSAE,
12897                                avx512vl_i16_info, avx512vl_f16_info,
12898                                SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
12899defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
12900                                X86cvtp2IntRnd, avx512vl_i16_info,
12901                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12902                                T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12903defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
12904                                X86VSintToFpRnd, avx512vl_f16_info,
12905                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12906                                T_MAP5, XS, EVEX_CD8<16, CD8VF>;
12907
12908// Convert Half to Signed/Unsigned Doubleword
12909multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12910                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12911                           X86SchedWriteWidths sched> {
12912  let Predicates = [HasFP16] in {
12913    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12914                            MaskOpNode, sched.ZMM>,
12915             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
12916                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12917  }
12918  let Predicates = [HasFP16, HasVLX] in {
12919    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12920                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12921    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12922                               MaskOpNode, sched.YMM>, EVEX_V256;
12923  }
12924}
12925
12926// Convert Half to Signed/Unsigned Doubleword with truncation
12927multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12928                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12929                            X86SchedWriteWidths sched> {
12930  let Predicates = [HasFP16] in {
12931    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12932                            MaskOpNode, sched.ZMM>,
12933             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
12934                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12935  }
12936  let Predicates = [HasFP16, HasVLX] in {
12937    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12938                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12939    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12940                               MaskOpNode, sched.YMM>, EVEX_V256;
12941  }
12942}
12943
12944
12945defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
12946                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
12947                                 EVEX_CD8<16, CD8VH>;
12948defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
12949                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
12950                                 EVEX_CD8<16, CD8VH>;
12951
12952defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
12953                                X86cvttp2si, X86cvttp2siSAE,
12954                                SchedWriteCvtPS2DQ>, T_MAP5, XS,
12955                                EVEX_CD8<16, CD8VH>;
12956
12957defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
12958                                 X86cvttp2ui, X86cvttp2uiSAE,
12959                                 SchedWriteCvtPS2DQ>, T_MAP5,
12960                                 EVEX_CD8<16, CD8VH>;
12961
12962// Convert Half to Signed/Unsigned Quardword
12963multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12964                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12965                           X86SchedWriteWidths sched> {
12966  let Predicates = [HasFP16] in {
12967    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12968                            MaskOpNode, sched.ZMM>,
12969             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
12970                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12971  }
12972  let Predicates = [HasFP16, HasVLX] in {
12973    // Explicitly specified broadcast string, since we take only 2 elements
12974    // from v8f16x_info source
12975    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
12976                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
12977                               EVEX_V128;
12978    // Explicitly specified broadcast string, since we take only 4 elements
12979    // from v8f16x_info source
12980    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
12981                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
12982                               EVEX_V256;
12983  }
12984}
12985
12986// Convert Half to Signed/Unsigned Quardword with truncation
12987multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12988                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12989                            X86SchedWriteWidths sched> {
12990  let Predicates = [HasFP16] in {
12991    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12992                            MaskOpNode, sched.ZMM>,
12993             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
12994                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12995  }
12996  let Predicates = [HasFP16, HasVLX] in {
12997    // Explicitly specified broadcast string, since we take only 2 elements
12998    // from v8f16x_info source
12999    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13000                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13001    // Explicitly specified broadcast string, since we take only 4 elements
13002    // from v8f16x_info source
13003    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13004                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13005  }
13006}
13007
13008defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13009                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13010                                 EVEX_CD8<16, CD8VQ>;
13011
13012defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13013                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13014                                 EVEX_CD8<16, CD8VQ>;
13015
13016defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13017                                 X86cvttp2si, X86cvttp2siSAE,
13018                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13019                                 EVEX_CD8<16, CD8VQ>;
13020
13021defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13022                                 X86cvttp2ui, X86cvttp2uiSAE,
13023                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13024                                 EVEX_CD8<16, CD8VQ>;
13025
13026// Convert Signed/Unsigned Quardword to Half
13027multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13028                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13029                           X86SchedWriteWidths sched> {
13030  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13031  // 512 memory forms of these instructions in Asm Parcer. They have the same
13032  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13033  // due to the same reason.
13034  let Predicates = [HasFP16] in {
13035    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13036                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13037             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13038                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13039  }
13040  let Predicates = [HasFP16, HasVLX] in {
13041    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13042                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13043                               i128mem, VK2WM>, EVEX_V128;
13044    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13045                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13046                               i256mem, VK4WM>, EVEX_V256;
13047  }
13048
13049  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13050                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13051                  VR128X:$src), 0, "att">;
13052  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13053                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13054                  VK2WM:$mask, VR128X:$src), 0, "att">;
13055  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13056                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13057                  VK2WM:$mask, VR128X:$src), 0, "att">;
13058  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13059                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13060                  i64mem:$src), 0, "att">;
13061  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13062                  "$dst {${mask}}, ${src}{1to2}}",
13063                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13064                  VK2WM:$mask, i64mem:$src), 0, "att">;
13065  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13066                  "$dst {${mask}} {z}, ${src}{1to2}}",
13067                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13068                  VK2WM:$mask, i64mem:$src), 0, "att">;
13069
13070  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13071                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13072                  VR256X:$src), 0, "att">;
13073  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13074                  "$dst {${mask}}, $src}",
13075                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13076                  VK4WM:$mask, VR256X:$src), 0, "att">;
13077  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13078                  "$dst {${mask}} {z}, $src}",
13079                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13080                  VK4WM:$mask, VR256X:$src), 0, "att">;
13081  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13082                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13083                  i64mem:$src), 0, "att">;
13084  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13085                  "$dst {${mask}}, ${src}{1to4}}",
13086                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13087                  VK4WM:$mask, i64mem:$src), 0, "att">;
13088  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13089                  "$dst {${mask}} {z}, ${src}{1to4}}",
13090                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13091                  VK4WM:$mask, i64mem:$src), 0, "att">;
13092
13093  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13094                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13095                  VR512:$src), 0, "att">;
13096  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13097                  "$dst {${mask}}, $src}",
13098                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13099                  VK8WM:$mask, VR512:$src), 0, "att">;
13100  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13101                  "$dst {${mask}} {z}, $src}",
13102                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13103                  VK8WM:$mask, VR512:$src), 0, "att">;
13104  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13105                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13106                  i64mem:$src), 0, "att">;
13107  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13108                  "$dst {${mask}}, ${src}{1to8}}",
13109                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13110                  VK8WM:$mask, i64mem:$src), 0, "att">;
13111  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13112                  "$dst {${mask}} {z}, ${src}{1to8}}",
13113                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13114                  VK8WM:$mask, i64mem:$src), 0, "att">;
13115}
13116
13117defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13118                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13119                            EVEX_CD8<64, CD8VF>;
13120
13121defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13122                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13123                            EVEX_CD8<64, CD8VF>;
13124
13125// Convert half to signed/unsigned int 32/64
13126defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13127                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13128                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13129defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13130                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13131                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13132defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13133                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13134                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13135defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13136                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13137                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13138
13139defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13140                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13141                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13142defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13143                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13144                        "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13145defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13146                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13147                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13148defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13149                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13150                        "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13151
13152let Predicates = [HasFP16] in {
13153  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13154                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13155                                   T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13156  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13157                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13158                                   T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13159  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13160                                    v8f16x_info, i32mem, loadi32,
13161                                    "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13162  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13163                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13164                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13165  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13166              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13167
13168  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13169              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13170
13171
13172  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13173            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13174  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13175            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13176
13177  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13178            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13179  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13180            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13181
13182  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13183            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13184  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13185            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13186
13187  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13188            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13189  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13190            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13191
13192  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13193  // which produce unnecessary vmovsh instructions
13194  def : Pat<(v8f16 (X86Movsh
13195                     (v8f16 VR128X:$dst),
13196                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13197            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13198
13199  def : Pat<(v8f16 (X86Movsh
13200                     (v8f16 VR128X:$dst),
13201                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13202            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13203
13204  def : Pat<(v8f16 (X86Movsh
13205                     (v8f16 VR128X:$dst),
13206                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13207            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13208
13209  def : Pat<(v8f16 (X86Movsh
13210                     (v8f16 VR128X:$dst),
13211                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13212            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13213
13214  def : Pat<(v8f16 (X86Movsh
13215                     (v8f16 VR128X:$dst),
13216                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13217            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13218
13219  def : Pat<(v8f16 (X86Movsh
13220                     (v8f16 VR128X:$dst),
13221                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13222            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13223
13224  def : Pat<(v8f16 (X86Movsh
13225                     (v8f16 VR128X:$dst),
13226                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13227            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13228
13229  def : Pat<(v8f16 (X86Movsh
13230                     (v8f16 VR128X:$dst),
13231                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13232            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13233} // Predicates = [HasFP16]
13234
13235let Predicates = [HasFP16, HasVLX] in {
13236  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13237  // patterns have been disabled with null_frag.
13238  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13239            (VCVTQQ2PHZ256rr VR256X:$src)>;
13240  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13241                           VK4WM:$mask),
13242            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13243  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13244                           VK4WM:$mask),
13245            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13246
13247  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13248            (VCVTQQ2PHZ256rm addr:$src)>;
13249  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13250                           VK4WM:$mask),
13251            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13252  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13253                           VK4WM:$mask),
13254            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13255
13256  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13257            (VCVTQQ2PHZ256rmb addr:$src)>;
13258  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13259                           (v8f16 VR128X:$src0), VK4WM:$mask),
13260            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13261  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13262                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13263            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13264
13265  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13266            (VCVTQQ2PHZ128rr VR128X:$src)>;
13267  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13268                           VK2WM:$mask),
13269            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13270  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13271                           VK2WM:$mask),
13272            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13273
13274  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13275            (VCVTQQ2PHZ128rm addr:$src)>;
13276  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13277                           VK2WM:$mask),
13278            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13279  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13280                           VK2WM:$mask),
13281            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13282
13283  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13284            (VCVTQQ2PHZ128rmb addr:$src)>;
13285  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13286                           (v8f16 VR128X:$src0), VK2WM:$mask),
13287            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13288  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13289                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13290            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13291
13292  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13293  // patterns have been disabled with null_frag.
13294  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13295            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13296  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13297                           VK4WM:$mask),
13298            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13299  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13300                           VK4WM:$mask),
13301            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13302
13303  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13304            (VCVTUQQ2PHZ256rm addr:$src)>;
13305  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13306                           VK4WM:$mask),
13307            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13308  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13309                           VK4WM:$mask),
13310            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13311
13312  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13313            (VCVTUQQ2PHZ256rmb addr:$src)>;
13314  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13315                           (v8f16 VR128X:$src0), VK4WM:$mask),
13316            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13317  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13318                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13319            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13320
13321  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13322            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13323  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13324                           VK2WM:$mask),
13325            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13326  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13327                           VK2WM:$mask),
13328            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13329
13330  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13331            (VCVTUQQ2PHZ128rm addr:$src)>;
13332  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13333                           VK2WM:$mask),
13334            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13335  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13336                           VK2WM:$mask),
13337            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13338
13339  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13340            (VCVTUQQ2PHZ128rmb addr:$src)>;
13341  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13342                           (v8f16 VR128X:$src0), VK2WM:$mask),
13343            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13344  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13345                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13346            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13347}
13348
13349let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13350  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13351    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13352            (ins _.RC:$src2, _.RC:$src3),
13353            OpcodeStr, "$src3, $src2", "$src2, $src3",
13354            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13355
13356    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13357            (ins _.RC:$src2, _.MemOp:$src3),
13358            OpcodeStr, "$src3, $src2", "$src2, $src3",
13359            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13360
13361    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13362            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13363            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13364            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13365  }
13366} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13367
13368multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13369                                 X86VectorVTInfo _> {
13370  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13371  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13372          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13373          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13374          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13375          EVEX, VVVV, EVEX_B, EVEX_RC;
13376}
13377
13378
13379multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13380  let Predicates = [HasFP16] in {
13381    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13382                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13383                      EVEX_V512, Sched<[WriteFMAZ]>;
13384  }
13385  let Predicates = [HasVLX, HasFP16] in {
13386    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13387    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13388  }
13389}
13390
13391multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13392                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13393  let Predicates = [HasFP16] in {
13394    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13395                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13396                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13397                                       "", "@earlyclobber $dst">, EVEX_V512;
13398  }
13399  let Predicates = [HasVLX, HasFP16] in {
13400    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13401                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13402    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13403                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13404  }
13405}
13406
13407
13408let Uses = [MXCSR] in {
13409  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13410                                    T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13411  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13412                                    T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13413
13414  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13415                                         x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13416  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13417                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13418}
13419
13420
13421multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13422                                   bit IsCommutable> {
13423  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13424    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13425                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13426                        "$src3, $src2", "$src2, $src3",
13427                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13428                        Sched<[WriteFMAX]>;
13429    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13430                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13431                        "$src3, $src2", "$src2, $src3",
13432                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13433                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13434    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13435                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13436                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13437                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13438                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13439  }
13440}
13441
13442multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13443                                     SDNode OpNodeRnd, bit IsCommutable> {
13444  let Predicates = [HasFP16] in {
13445    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13446                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13447                        "$src2, $src1", "$src1, $src2",
13448                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13449                        IsCommutable, IsCommutable, IsCommutable,
13450                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13451    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13452                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13453                        "$src2, $src1", "$src1, $src2",
13454                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13455                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13456                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13457    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13458                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13459                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13460                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13461                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13462                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13463  }
13464}
13465
13466let Uses = [MXCSR] in {
13467  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13468                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13469  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13470                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13471
13472  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13473                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13474  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13475                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13476}
13477