xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// This multiclass generates the masking variants from the non-masking
16// variant.  It only provides the assembly pieces for the masking variants.
17// It assumes custom ISel patterns for masking which can be provided as
18// template arguments.
19multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                  dag Outs,
21                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                  string OpcodeStr,
23                                  string AttSrcAsm, string IntelSrcAsm,
24                                  list<dag> Pattern,
25                                  list<dag> MaskingPattern,
26                                  list<dag> ZeroMaskingPattern,
27                                  string MaskingConstraint = "",
28                                  bit IsCommutable = 0,
29                                  bit IsKCommutable = 0,
30                                  bit IsKZCommutable = IsCommutable,
31                                  string ClobberConstraint = ""> {
32  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33    def NAME: AVX512<O, F, Outs, Ins,
34                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35                                     "$dst, "#IntelSrcAsm#"}",
36                       Pattern>;
37
38  // Prefer over VMOV*rrk Pat<>
39  let isCommutable = IsKCommutable in
40    def NAME#k: AVX512<O, F, Outs, MaskingIns,
41                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
43                       MaskingPattern>,
44              EVEX_K {
45      // In case of the 3src subclass this is overridden with a let.
46      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
49    }
50
51  // Zero mask does not add any restrictions to commute operands transformation.
52  // So, it is Ok to use IsCommutable instead of IsKCommutable.
53  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54      Constraints = ClobberConstraint in
55    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
58                       ZeroMaskingPattern>,
59              EVEX_KZ;
60}
61
62
63// Common base class of AVX512_maskable and AVX512_maskable_3src.
64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
65                                  dag Outs,
66                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
67                                  string OpcodeStr,
68                                  string AttSrcAsm, string IntelSrcAsm,
69                                  dag RHS, dag MaskingRHS,
70                                  SDPatternOperator Select = vselect_mask,
71                                  string MaskingConstraint = "",
72                                  bit IsCommutable = 0,
73                                  bit IsKCommutable = 0,
74                                  bit IsKZCommutable = IsCommutable,
75                                  string ClobberConstraint = ""> :
76  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77                         AttSrcAsm, IntelSrcAsm,
78                         [(set _.RC:$dst, RHS)],
79                         [(set _.RC:$dst, MaskingRHS)],
80                         [(set _.RC:$dst,
81                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82                         MaskingConstraint, IsCommutable,
83                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
84
85// This multiclass generates the unconditional/non-masking, the masking and
86// the zero-masking variant of the vector instruction.  In the masking case, the
87// preserved vector elements come from a new dummy input operand tied to $dst.
88// This version uses a separate dag for non-masking and masking.
89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90                           dag Outs, dag Ins, string OpcodeStr,
91                           string AttSrcAsm, string IntelSrcAsm,
92                           dag RHS, dag MaskRHS,
93                           string ClobberConstraint = "",
94                           bit IsCommutable = 0, bit IsKCommutable = 0,
95                           bit IsKZCommutable = IsCommutable> :
96   AVX512_maskable_custom<O, F, Outs, Ins,
97                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98                          !con((ins _.KRCWM:$mask), Ins),
99                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
100                          [(set _.RC:$dst, RHS)],
101                          [(set _.RC:$dst,
102                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
103                          [(set _.RC:$dst,
104                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105                          "$src0 = $dst", IsCommutable, IsKCommutable,
106                          IsKZCommutable, ClobberConstraint>;
107
108// This multiclass generates the unconditional/non-masking, the masking and
109// the zero-masking variant of the vector instruction.  In the masking case, the
110// preserved vector elements come from a new dummy input operand tied to $dst.
111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112                           dag Outs, dag Ins, string OpcodeStr,
113                           string AttSrcAsm, string IntelSrcAsm,
114                           dag RHS,
115                           bit IsCommutable = 0, bit IsKCommutable = 0,
116                           bit IsKZCommutable = IsCommutable,
117                           SDPatternOperator Select = vselect_mask,
118                           string ClobberConstraint = ""> :
119   AVX512_maskable_common<O, F, _, Outs, Ins,
120                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121                          !con((ins _.KRCWM:$mask), Ins),
122                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125                          IsKZCommutable, ClobberConstraint>;
126
127// This multiclass generates the unconditional/non-masking, the masking and
128// the zero-masking variant of the scalar instruction.
129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130                           dag Outs, dag Ins, string OpcodeStr,
131                           string AttSrcAsm, string IntelSrcAsm,
132                           dag RHS> :
133   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134                   RHS, 0, 0, 0, X86selects_mask>;
135
136// Similar to AVX512_maskable but in this case one of the source operands
137// ($src1) is already tied to $dst so we just use that for the preserved
138// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
139// $src1.
140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141                                dag Outs, dag NonTiedIns, string OpcodeStr,
142                                string AttSrcAsm, string IntelSrcAsm,
143                                dag RHS,
144                                bit IsCommutable = 0,
145                                bit IsKCommutable = 0,
146                                SDPatternOperator Select = vselect_mask,
147                                bit MaskOnly = 0> :
148   AVX512_maskable_common<O, F, _, Outs,
149                          !con((ins _.RC:$src1), NonTiedIns),
150                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
153                          !if(MaskOnly, (null_frag), RHS),
154                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155                          Select, "", IsCommutable, IsKCommutable>;
156
157// Similar to AVX512_maskable_3src but in this case the input VT for the tied
158// operand differs from the output VT. This requires a bitconvert on
159// the preserved vector going into the vselect.
160// NOTE: The unmasked pattern is disabled.
161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162                                     X86VectorVTInfo InVT,
163                                     dag Outs, dag NonTiedIns, string OpcodeStr,
164                                     string AttSrcAsm, string IntelSrcAsm,
165                                     dag RHS, bit IsCommutable = 0> :
166   AVX512_maskable_common<O, F, OutVT, Outs,
167                          !con((ins InVT.RC:$src1), NonTiedIns),
168                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171                          (vselect_mask InVT.KRCWM:$mask, RHS,
172                           (bitconvert InVT.RC:$src1)),
173                           vselect_mask, "", IsCommutable>;
174
175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176                                     dag Outs, dag NonTiedIns, string OpcodeStr,
177                                     string AttSrcAsm, string IntelSrcAsm,
178                                     dag RHS,
179                                     bit IsCommutable = 0,
180                                     bit IsKCommutable = 0,
181                                     bit MaskOnly = 0> :
182   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184                        X86selects_mask, MaskOnly>;
185
186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
187                                  dag Outs, dag Ins,
188                                  string OpcodeStr,
189                                  string AttSrcAsm, string IntelSrcAsm,
190                                  list<dag> Pattern> :
191   AVX512_maskable_custom<O, F, Outs, Ins,
192                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193                          !con((ins _.KRCWM:$mask), Ins),
194                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
195                          "$src0 = $dst">;
196
197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198                                       dag Outs, dag NonTiedIns,
199                                       string OpcodeStr,
200                                       string AttSrcAsm, string IntelSrcAsm,
201                                       list<dag> Pattern> :
202   AVX512_maskable_custom<O, F, Outs,
203                          !con((ins _.RC:$src1), NonTiedIns),
204                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
207                          "">;
208
209// Instruction with mask that puts result in mask register,
210// like "compare" and "vptest"
211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
212                                  dag Outs,
213                                  dag Ins, dag MaskingIns,
214                                  string OpcodeStr,
215                                  string AttSrcAsm, string IntelSrcAsm,
216                                  list<dag> Pattern,
217                                  list<dag> MaskingPattern,
218                                  bit IsCommutable = 0> {
219    let isCommutable = IsCommutable in {
220    def NAME: AVX512<O, F, Outs, Ins,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                     "$dst, "#IntelSrcAsm#"}",
223                       Pattern>;
224
225    def NAME#k: AVX512<O, F, Outs, MaskingIns,
226                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
228                       MaskingPattern>, EVEX_K;
229    }
230}
231
232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
233                                  dag Outs,
234                                  dag Ins, dag MaskingIns,
235                                  string OpcodeStr,
236                                  string AttSrcAsm, string IntelSrcAsm,
237                                  dag RHS, dag MaskingRHS,
238                                  bit IsCommutable = 0> :
239  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240                         AttSrcAsm, IntelSrcAsm,
241                         [(set _.KRC:$dst, RHS)],
242                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
243
244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245                           dag Outs, dag Ins, string OpcodeStr,
246                           string AttSrcAsm, string IntelSrcAsm,
247                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
248   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249                          !con((ins _.KRCWM:$mask), Ins),
250                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
252
253// Used by conversion instructions.
254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
255                                  dag Outs,
256                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
257                                  string OpcodeStr,
258                                  string AttSrcAsm, string IntelSrcAsm,
259                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                         AttSrcAsm, IntelSrcAsm,
262                         [(set _.RC:$dst, RHS)],
263                         [(set _.RC:$dst, MaskingRHS)],
264                         [(set _.RC:$dst, ZeroMaskingRHS)],
265                         "$src0 = $dst">;
266
267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268                               dag Outs, dag NonTiedIns, string OpcodeStr,
269                               string AttSrcAsm, string IntelSrcAsm,
270                               dag RHS, dag MaskingRHS, bit IsCommutable,
271                               bit IsKCommutable> :
272   AVX512_maskable_custom<O, F, Outs,
273                          !con((ins _.RC:$src1), NonTiedIns),
274                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
277                          [(set _.RC:$dst, RHS)],
278                          [(set _.RC:$dst,
279                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
280                          [(set _.RC:$dst,
281                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282                          "", IsCommutable, IsKCommutable>;
283
284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286// swizzled by ExecutionDomainFix to pxor.
287// We set canFoldAsLoad because this can be converted to a constant-pool
288// load of an all-zeros value if folding it would be beneficial.
289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
295}
296
297let Predicates = [HasAVX512] in {
298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
304}
305
306// Alias instructions that allow VPTERNLOG to be used with a mask to create
307// a mix of all ones and all zeros elements. This is done this way to force
308// the same register to be used as input for all three sources.
309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311                                (ins VK16WM:$mask), "",
312                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313                                                      (v16i32 immAllOnesV),
314                                                      (v16i32 immAllZerosV)))]>;
315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316                                (ins VK8WM:$mask), "",
317                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
318                                           (v8i64 immAllOnesV),
319                                           (v8i64 immAllZerosV)))]>;
320}
321
322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
328}
329
330let Predicates = [HasAVX512] in {
331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
343}
344
345// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346// This is expanded by ExpandPostRAPseudos.
347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350                          [(set FR16X:$dst, fp16imm0)]>;
351  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352                          [(set FR32X:$dst, fp32imm0)]>;
353  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354                          [(set FR64X:$dst, fp64imm0)]>;
355  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356                            [(set VR128X:$dst, fp128imm0)]>;
357}
358
359//===----------------------------------------------------------------------===//
360// AVX-512 - VECTOR INSERT
361//
362
363// Supports two different pattern operators for mask and unmasked ops. Allows
364// null_frag to be passed for one.
365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
366                                  X86VectorVTInfo To,
367                                  SDPatternOperator vinsert_insert,
368                                  SDPatternOperator vinsert_for_mask,
369                                  X86FoldableSchedWrite sched> {
370  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
374                   "$src3, $src2, $src1", "$src1, $src2, $src3",
375                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
376                                         (From.VT From.RC:$src2),
377                                         (iPTR imm)),
378                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379                                           (From.VT From.RC:$src2),
380                                           (iPTR imm))>,
381                   AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
382    let mayLoad = 1 in
383    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
386                   "$src3, $src2, $src1", "$src1, $src2, $src3",
387                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
388                               (From.VT (From.LdFrag addr:$src2)),
389                               (iPTR imm)),
390                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391                               (From.VT (From.LdFrag addr:$src2)),
392                               (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394                   Sched<[sched.Folded, sched.ReadAfterFold]>;
395  }
396}
397
398// Passes the same pattern operator for masked and unmasked ops.
399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
400                            X86VectorVTInfo To,
401                            SDPatternOperator vinsert_insert,
402                            X86FoldableSchedWrite sched> :
403  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
404
405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406                       X86VectorVTInfo To, PatFrag vinsert_insert,
407                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408  let Predicates = p in {
409    def : Pat<(vinsert_insert:$ins
410                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411              (To.VT (!cast<Instruction>(InstrStr#"rr")
412                     To.RC:$src1, From.RC:$src2,
413                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
414
415    def : Pat<(vinsert_insert:$ins
416                  (To.VT To.RC:$src1),
417                  (From.VT (From.LdFrag addr:$src2)),
418                  (iPTR imm)),
419              (To.VT (!cast<Instruction>(InstrStr#"rm")
420                  To.RC:$src1, addr:$src2,
421                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
422  }
423}
424
425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426                            ValueType EltVT64, int Opcode256,
427                            X86FoldableSchedWrite sched> {
428
429  let Predicates = [HasVLX] in
430    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
432                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
433                                 vinsert128_insert, sched>, EVEX_V256;
434
435  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
437                                 X86VectorVTInfo<16, EltVT32, VR512>,
438                                 vinsert128_insert, sched>, EVEX_V512;
439
440  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
442                                 X86VectorVTInfo< 8, EltVT64, VR512>,
443                                 vinsert256_insert, sched>, REX_W, EVEX_V512;
444
445  // Even with DQI we'd like to only use these instructions for masking.
446  let Predicates = [HasVLX, HasDQI] in
447    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
449                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                   null_frag, vinsert128_insert, sched>,
451                                   EVEX_V256, REX_W;
452
453  // Even with DQI we'd like to only use these instructions for masking.
454  let Predicates = [HasDQI] in {
455    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                 X86VectorVTInfo< 8, EltVT64, VR512>,
458                                 null_frag, vinsert128_insert, sched>,
459                                 REX_W, EVEX_V512;
460
461    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
463                                   X86VectorVTInfo<16, EltVT32, VR512>,
464                                   null_frag, vinsert256_insert, sched>,
465                                   EVEX_V512;
466  }
467}
468
469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
472
473// Codegen pattern with the alternative types,
474// Even with AVX512DQ we'll still use these for unmasked operations.
475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
479
480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
484
485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
489
490// Codegen pattern with the alternative types insert VEC128 into VEC256
491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
498              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
499// Codegen pattern with the alternative types insert VEC128 into VEC512
500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
501              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
503               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
505              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
506defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
507              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
508// Codegen pattern with the alternative types insert VEC256 into VEC512
509defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
510              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
512              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
514              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
515defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
516              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
517
518
519multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
520                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
521                                 PatFrag vinsert_insert,
522                                 SDNodeXForm INSERT_get_vinsert_imm,
523                                 list<Predicate> p> {
524let Predicates = p in {
525  def : Pat<(Cast.VT
526             (vselect_mask Cast.KRCWM:$mask,
527                           (bitconvert
528                            (vinsert_insert:$ins (To.VT To.RC:$src1),
529                                                 (From.VT From.RC:$src2),
530                                                 (iPTR imm))),
531                           Cast.RC:$src0)),
532            (!cast<Instruction>(InstrStr#"rrk")
533             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
534             (INSERT_get_vinsert_imm To.RC:$ins))>;
535  def : Pat<(Cast.VT
536             (vselect_mask Cast.KRCWM:$mask,
537                           (bitconvert
538                            (vinsert_insert:$ins (To.VT To.RC:$src1),
539                                                 (From.VT
540                                                  (bitconvert
541                                                   (From.LdFrag addr:$src2))),
542                                                 (iPTR imm))),
543                           Cast.RC:$src0)),
544            (!cast<Instruction>(InstrStr#"rmk")
545             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
546             (INSERT_get_vinsert_imm To.RC:$ins))>;
547
548  def : Pat<(Cast.VT
549             (vselect_mask Cast.KRCWM:$mask,
550                           (bitconvert
551                            (vinsert_insert:$ins (To.VT To.RC:$src1),
552                                                 (From.VT From.RC:$src2),
553                                                 (iPTR imm))),
554                           Cast.ImmAllZerosV)),
555            (!cast<Instruction>(InstrStr#"rrkz")
556             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
557             (INSERT_get_vinsert_imm To.RC:$ins))>;
558  def : Pat<(Cast.VT
559             (vselect_mask Cast.KRCWM:$mask,
560                           (bitconvert
561                            (vinsert_insert:$ins (To.VT To.RC:$src1),
562                                                 (From.VT (From.LdFrag addr:$src2)),
563                                                 (iPTR imm))),
564                           Cast.ImmAllZerosV)),
565            (!cast<Instruction>(InstrStr#"rmkz")
566             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
567             (INSERT_get_vinsert_imm To.RC:$ins))>;
568}
569}
570
571defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
572                             v8f32x_info, vinsert128_insert,
573                             INSERT_get_vinsert128_imm, [HasVLX]>;
574defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
575                             v4f64x_info, vinsert128_insert,
576                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
577
578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
579                             v8i32x_info, vinsert128_insert,
580                             INSERT_get_vinsert128_imm, [HasVLX]>;
581defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582                             v8i32x_info, vinsert128_insert,
583                             INSERT_get_vinsert128_imm, [HasVLX]>;
584defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
585                             v8i32x_info, vinsert128_insert,
586                             INSERT_get_vinsert128_imm, [HasVLX]>;
587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
588                             v4i64x_info, vinsert128_insert,
589                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
590defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
591                             v4i64x_info, vinsert128_insert,
592                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
593defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
594                             v4i64x_info, vinsert128_insert,
595                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
596
597defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
598                             v16f32_info, vinsert128_insert,
599                             INSERT_get_vinsert128_imm, [HasAVX512]>;
600defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
601                             v8f64_info, vinsert128_insert,
602                             INSERT_get_vinsert128_imm, [HasDQI]>;
603
604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
605                             v16i32_info, vinsert128_insert,
606                             INSERT_get_vinsert128_imm, [HasAVX512]>;
607defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
608                             v16i32_info, vinsert128_insert,
609                             INSERT_get_vinsert128_imm, [HasAVX512]>;
610defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
611                             v16i32_info, vinsert128_insert,
612                             INSERT_get_vinsert128_imm, [HasAVX512]>;
613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
614                             v8i64_info, vinsert128_insert,
615                             INSERT_get_vinsert128_imm, [HasDQI]>;
616defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
617                             v8i64_info, vinsert128_insert,
618                             INSERT_get_vinsert128_imm, [HasDQI]>;
619defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
620                             v8i64_info, vinsert128_insert,
621                             INSERT_get_vinsert128_imm, [HasDQI]>;
622
623defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
624                             v16f32_info, vinsert256_insert,
625                             INSERT_get_vinsert256_imm, [HasDQI]>;
626defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
627                             v8f64_info, vinsert256_insert,
628                             INSERT_get_vinsert256_imm, [HasAVX512]>;
629
630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
631                             v16i32_info, vinsert256_insert,
632                             INSERT_get_vinsert256_imm, [HasDQI]>;
633defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
634                             v16i32_info, vinsert256_insert,
635                             INSERT_get_vinsert256_imm, [HasDQI]>;
636defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
637                             v16i32_info, vinsert256_insert,
638                             INSERT_get_vinsert256_imm, [HasDQI]>;
639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
640                             v8i64_info, vinsert256_insert,
641                             INSERT_get_vinsert256_imm, [HasAVX512]>;
642defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
643                             v8i64_info, vinsert256_insert,
644                             INSERT_get_vinsert256_imm, [HasAVX512]>;
645defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
646                             v8i64_info, vinsert256_insert,
647                             INSERT_get_vinsert256_imm, [HasAVX512]>;
648
649// vinsertps - insert f32 to XMM
650let ExeDomain = SSEPackedSingle in {
651let isCommutable = 1 in
652def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
653      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
654      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
655      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
656      EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
657def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
658      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
659      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
660      [(set VR128X:$dst, (X86insertps VR128X:$src1,
661                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
662                          timm:$src3))]>,
663      EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
664      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
665}
666
667//===----------------------------------------------------------------------===//
668// AVX-512 VECTOR EXTRACT
669//---
670
671// Supports two different pattern operators for mask and unmasked ops. Allows
672// null_frag to be passed for one.
673multiclass vextract_for_size_split<int Opcode,
674                                   X86VectorVTInfo From, X86VectorVTInfo To,
675                                   SDPatternOperator vextract_extract,
676                                   SDPatternOperator vextract_for_mask,
677                                   SchedWrite SchedRR, SchedWrite SchedMR> {
678
679  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
680    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
681                (ins From.RC:$src1, u8imm:$idx),
682                "vextract" # To.EltTypeName # "x" # To.NumElts,
683                "$idx, $src1", "$src1, $idx",
684                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
685                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
686                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
687
688    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
689                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
690                    "vextract" # To.EltTypeName # "x" # To.NumElts #
691                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
692                    [(store (To.VT (vextract_extract:$idx
693                                    (From.VT From.RC:$src1), (iPTR imm))),
694                             addr:$dst)]>, EVEX,
695                    Sched<[SchedMR]>;
696
697    let mayStore = 1, hasSideEffects = 0 in
698    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
699                    (ins To.MemOp:$dst, To.KRCWM:$mask,
700                                        From.RC:$src1, u8imm:$idx),
701                     "vextract" # To.EltTypeName # "x" # To.NumElts #
702                          "\t{$idx, $src1, $dst {${mask}}|"
703                          "$dst {${mask}}, $src1, $idx}", []>,
704                    EVEX_K, EVEX, Sched<[SchedMR]>;
705  }
706}
707
708// Passes the same pattern operator for masked and unmasked ops.
709multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
710                             X86VectorVTInfo To,
711                             SDPatternOperator vextract_extract,
712                             SchedWrite SchedRR, SchedWrite SchedMR> :
713  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
714
715// Codegen pattern for the alternative types
716multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
717                X86VectorVTInfo To, PatFrag vextract_extract,
718                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
719  let Predicates = p in {
720     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
721               (To.VT (!cast<Instruction>(InstrStr#"rr")
722                          From.RC:$src1,
723                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
724     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
725                              (iPTR imm))), addr:$dst),
726               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
727                (EXTRACT_get_vextract_imm To.RC:$ext))>;
728  }
729}
730
731multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
732                             ValueType EltVT64, int Opcode256,
733                             SchedWrite SchedRR, SchedWrite SchedMR> {
734  let Predicates = [HasAVX512] in {
735    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
736                                   X86VectorVTInfo<16, EltVT32, VR512>,
737                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
738                                   vextract128_extract, SchedRR, SchedMR>,
739                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
740    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
741                                   X86VectorVTInfo< 8, EltVT64, VR512>,
742                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
743                                   vextract256_extract, SchedRR, SchedMR>,
744                                       REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
745  }
746  let Predicates = [HasVLX] in
747    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
748                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
749                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
750                                 vextract128_extract, SchedRR, SchedMR>,
751                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
752
753  // Even with DQI we'd like to only use these instructions for masking.
754  let Predicates = [HasVLX, HasDQI] in
755    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
756                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
757                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
758                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
759                                    EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
760
761  // Even with DQI we'd like to only use these instructions for masking.
762  let Predicates = [HasDQI] in {
763    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
764                                 X86VectorVTInfo< 8, EltVT64, VR512>,
765                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
766                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
767                                     REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
768    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
769                                 X86VectorVTInfo<16, EltVT32, VR512>,
770                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
771                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
772                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
773  }
774}
775
776// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
777defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
778defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
779
780// extract_subvector codegen patterns with the alternative types.
781// Even with AVX512DQ we'll still use these for unmasked operations.
782defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
783          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
784defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
785          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
786
787defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
788          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
789defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
790          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
791
792defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
793          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
795          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796
797// Codegen pattern with the alternative types extract VEC128 from VEC256
798defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
799          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
801          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
802defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
803          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
805          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
806
807// Codegen pattern with the alternative types extract VEC128 from VEC512
808defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
809                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
810defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
811                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
812defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
813                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
814defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
815                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
816// Codegen pattern with the alternative types extract VEC256 from VEC512
817defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
818                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
819defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
820                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
821defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
822                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
823defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
824                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
825
826
827// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
828// smaller extract to enable EVEX->VEX.
829let Predicates = [NoVLX, HasEVEX512] in {
830def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
831          (v2i64 (VEXTRACTI128rr
832                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
833                  (iPTR 1)))>;
834def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
835          (v2f64 (VEXTRACTF128rr
836                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
837                  (iPTR 1)))>;
838def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
839          (v4i32 (VEXTRACTI128rr
840                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
841                  (iPTR 1)))>;
842def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
843          (v4f32 (VEXTRACTF128rr
844                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
845                  (iPTR 1)))>;
846def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
847          (v8i16 (VEXTRACTI128rr
848                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
849                  (iPTR 1)))>;
850def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
851          (v8f16 (VEXTRACTF128rr
852                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
853                  (iPTR 1)))>;
854def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
855          (v16i8 (VEXTRACTI128rr
856                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
857                  (iPTR 1)))>;
858}
859
860// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
861// smaller extract to enable EVEX->VEX.
862let Predicates = [HasVLX] in {
863def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
864          (v2i64 (VEXTRACTI32x4Z256rr
865                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
866                  (iPTR 1)))>;
867def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
868          (v2f64 (VEXTRACTF32x4Z256rr
869                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
870                  (iPTR 1)))>;
871def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
872          (v4i32 (VEXTRACTI32x4Z256rr
873                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
874                  (iPTR 1)))>;
875def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
876          (v4f32 (VEXTRACTF32x4Z256rr
877                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
878                  (iPTR 1)))>;
879def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
880          (v8i16 (VEXTRACTI32x4Z256rr
881                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
882                  (iPTR 1)))>;
883def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
884          (v8f16 (VEXTRACTF32x4Z256rr
885                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
886                  (iPTR 1)))>;
887def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
888          (v16i8 (VEXTRACTI32x4Z256rr
889                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
890                  (iPTR 1)))>;
891}
892
893
894// Additional patterns for handling a bitcast between the vselect and the
895// extract_subvector.
896multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
897                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
898                                  PatFrag vextract_extract,
899                                  SDNodeXForm EXTRACT_get_vextract_imm,
900                                  list<Predicate> p> {
901let Predicates = p in {
902  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
903                                   (bitconvert
904                                    (To.VT (vextract_extract:$ext
905                                            (From.VT From.RC:$src), (iPTR imm)))),
906                                   To.RC:$src0)),
907            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
908                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
909                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
910
911  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
912                                   (bitconvert
913                                    (To.VT (vextract_extract:$ext
914                                            (From.VT From.RC:$src), (iPTR imm)))),
915                                   Cast.ImmAllZerosV)),
916            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
917                      Cast.KRCWM:$mask, From.RC:$src,
918                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
919}
920}
921
922defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
923                              v4f32x_info, vextract128_extract,
924                              EXTRACT_get_vextract128_imm, [HasVLX]>;
925defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
926                              v2f64x_info, vextract128_extract,
927                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
928
929defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930                              v4i32x_info, vextract128_extract,
931                              EXTRACT_get_vextract128_imm, [HasVLX]>;
932defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
933                              v4i32x_info, vextract128_extract,
934                              EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936                              v4i32x_info, vextract128_extract,
937                              EXTRACT_get_vextract128_imm, [HasVLX]>;
938defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
939                              v2i64x_info, vextract128_extract,
940                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
941defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
942                              v2i64x_info, vextract128_extract,
943                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
944defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
945                              v2i64x_info, vextract128_extract,
946                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
947
948defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
949                              v4f32x_info, vextract128_extract,
950                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
951defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
952                              v2f64x_info, vextract128_extract,
953                              EXTRACT_get_vextract128_imm, [HasDQI]>;
954
955defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
956                              v4i32x_info, vextract128_extract,
957                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
958defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
959                              v4i32x_info, vextract128_extract,
960                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
961defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
962                              v4i32x_info, vextract128_extract,
963                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
964defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
965                              v2i64x_info, vextract128_extract,
966                              EXTRACT_get_vextract128_imm, [HasDQI]>;
967defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
968                              v2i64x_info, vextract128_extract,
969                              EXTRACT_get_vextract128_imm, [HasDQI]>;
970defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
971                              v2i64x_info, vextract128_extract,
972                              EXTRACT_get_vextract128_imm, [HasDQI]>;
973
974defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
975                              v8f32x_info, vextract256_extract,
976                              EXTRACT_get_vextract256_imm, [HasDQI]>;
977defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
978                              v4f64x_info, vextract256_extract,
979                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
980
981defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
982                              v8i32x_info, vextract256_extract,
983                              EXTRACT_get_vextract256_imm, [HasDQI]>;
984defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
985                              v8i32x_info, vextract256_extract,
986                              EXTRACT_get_vextract256_imm, [HasDQI]>;
987defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
988                              v8i32x_info, vextract256_extract,
989                              EXTRACT_get_vextract256_imm, [HasDQI]>;
990defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
991                              v4i64x_info, vextract256_extract,
992                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
993defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
994                              v4i64x_info, vextract256_extract,
995                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
996defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
997                              v4i64x_info, vextract256_extract,
998                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
999
1000// vextractps - extract 32 bits from XMM
1001def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1002      (ins VR128X:$src1, u8imm:$src2),
1003      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1004      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1005      EVEX, WIG, Sched<[WriteVecExtract]>;
1006
1007def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1008      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1009      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1010      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1011                          addr:$dst)]>,
1012      EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1013
1014//===---------------------------------------------------------------------===//
1015// AVX-512 BROADCAST
1016//---
1017// broadcast with a scalar argument.
1018multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1019                                   X86VectorVTInfo SrcInfo> {
1020  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1021            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1022             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1023  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1024                                       (X86VBroadcast SrcInfo.FRC:$src),
1025                                       DestInfo.RC:$src0)),
1026            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1027             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1028             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1029  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1030                                       (X86VBroadcast SrcInfo.FRC:$src),
1031                                       DestInfo.ImmAllZerosV)),
1032            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1033             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1034}
1035
1036// Split version to allow mask and broadcast node to be different types. This
1037// helps support the 32x2 broadcasts.
1038multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1039                                     SchedWrite SchedRR, SchedWrite SchedRM,
1040                                     X86VectorVTInfo MaskInfo,
1041                                     X86VectorVTInfo DestInfo,
1042                                     X86VectorVTInfo SrcInfo,
1043                                     bit IsConvertibleToThreeAddress,
1044                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1045                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1046  let hasSideEffects = 0 in
1047  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1048                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1049                    [(set MaskInfo.RC:$dst,
1050                      (MaskInfo.VT
1051                       (bitconvert
1052                        (DestInfo.VT
1053                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1054                    DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1055  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1056                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1057                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1058                       "${dst} {${mask}} {z}, $src}"),
1059                       [(set MaskInfo.RC:$dst,
1060                         (vselect_mask MaskInfo.KRCWM:$mask,
1061                          (MaskInfo.VT
1062                           (bitconvert
1063                            (DestInfo.VT
1064                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1065                          MaskInfo.ImmAllZerosV))],
1066                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1067  let Constraints = "$src0 = $dst" in
1068  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1069                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1070                          SrcInfo.RC:$src),
1071                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1072                     "${dst} {${mask}}, $src}"),
1073                     [(set MaskInfo.RC:$dst,
1074                       (vselect_mask MaskInfo.KRCWM:$mask,
1075                        (MaskInfo.VT
1076                         (bitconvert
1077                          (DestInfo.VT
1078                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1079                        MaskInfo.RC:$src0))],
1080                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1081
1082  let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in
1083  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1084                    (ins SrcInfo.ScalarMemOp:$src),
1085                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1086                    [(set MaskInfo.RC:$dst,
1087                      (MaskInfo.VT
1088                       (bitconvert
1089                        (DestInfo.VT
1090                         (UnmaskedBcastOp addr:$src)))))],
1091                    DestInfo.ExeDomain>, T8, PD, EVEX,
1092                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1093
1094  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1095                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1096                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1097                       "${dst} {${mask}} {z}, $src}"),
1098                       [(set MaskInfo.RC:$dst,
1099                         (vselect_mask MaskInfo.KRCWM:$mask,
1100                          (MaskInfo.VT
1101                           (bitconvert
1102                            (DestInfo.VT
1103                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1104                          MaskInfo.ImmAllZerosV))],
1105                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1106                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1107
1108  let Constraints = "$src0 = $dst",
1109      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1110  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1111                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1112                          SrcInfo.ScalarMemOp:$src),
1113                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1114                     "${dst} {${mask}}, $src}"),
1115                     [(set MaskInfo.RC:$dst,
1116                       (vselect_mask MaskInfo.KRCWM:$mask,
1117                        (MaskInfo.VT
1118                         (bitconvert
1119                          (DestInfo.VT
1120                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1121                        MaskInfo.RC:$src0))],
1122                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1123                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1124}
1125
1126// Helper class to force mask and broadcast result to same type.
1127multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1128                               SchedWrite SchedRR, SchedWrite SchedRM,
1129                               X86VectorVTInfo DestInfo,
1130                               X86VectorVTInfo SrcInfo,
1131                               bit IsConvertibleToThreeAddress> :
1132  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1133                            DestInfo, DestInfo, SrcInfo,
1134                            IsConvertibleToThreeAddress>;
1135
1136multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1137                                  AVX512VLVectorVTInfo _> {
1138  let Predicates = [HasAVX512] in {
1139    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1140                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1141              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1142              EVEX_V512;
1143  }
1144
1145  let Predicates = [HasVLX] in {
1146    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1147                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1148                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1149                 EVEX_V256;
1150  }
1151}
1152
1153multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1154                                  AVX512VLVectorVTInfo _> {
1155  let Predicates = [HasAVX512] in {
1156    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1157                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1158              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1159              EVEX_V512;
1160  }
1161
1162  let Predicates = [HasVLX] in {
1163    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1164                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1165                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1166                 EVEX_V256;
1167    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1168                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1169                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1170                 EVEX_V128;
1171  }
1172}
1173defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1174                                       avx512vl_f32_info>;
1175defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1176                                       avx512vl_f64_info>, REX_W;
1177
1178multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1179                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1180                                    RegisterClass SrcRC> {
1181  // Fold with a mask even if it has multiple uses since it is cheap.
1182  let ExeDomain = _.ExeDomain in
1183  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1184                          (ins SrcRC:$src),
1185                          "vpbroadcast"#_.Suffix, "$src", "$src",
1186                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1187                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1188                          T8, PD, EVEX, Sched<[SchedRR]>;
1189}
1190
1191multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1192                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1193                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1194  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1195  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1196                         (outs _.RC:$dst), (ins GR32:$src),
1197                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1198                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1199                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1200                         "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1201
1202  def : Pat <(_.VT (OpNode SrcRC:$src)),
1203             (!cast<Instruction>(Name#rr)
1204              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1205
1206  // Fold with a mask even if it has multiple uses since it is cheap.
1207  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1208             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1209              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1210
1211  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1212             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1213              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1214}
1215
1216multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1217                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1218                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1219  let Predicates = [prd] in
1220    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1221              OpNode, SrcRC, Subreg>, EVEX_V512;
1222  let Predicates = [prd, HasVLX] in {
1223    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1224              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1225    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1226              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1227  }
1228}
1229
1230multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1231                                       SDPatternOperator OpNode,
1232                                       RegisterClass SrcRC, Predicate prd> {
1233  let Predicates = [prd] in
1234    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1235                                      SrcRC>, EVEX_V512;
1236  let Predicates = [prd, HasVLX] in {
1237    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1238                                         SrcRC>, EVEX_V256;
1239    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1240                                         SrcRC>, EVEX_V128;
1241  }
1242}
1243
1244defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1245                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1246defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1247                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1248                       HasBWI>;
1249defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1250                                                 X86VBroadcast, GR32, HasAVX512>;
1251defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1252                                                 X86VBroadcast, GR64, HasAVX512>, REX_W;
1253
1254multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1255                                      AVX512VLVectorVTInfo _, Predicate prd,
1256                                      bit IsConvertibleToThreeAddress> {
1257  let Predicates = [prd] in {
1258    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1259                                   WriteShuffle256Ld, _.info512, _.info128,
1260                                   IsConvertibleToThreeAddress>,
1261                                  EVEX_V512;
1262  }
1263  let Predicates = [prd, HasVLX] in {
1264    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1265                                    WriteShuffle256Ld, _.info256, _.info128,
1266                                    IsConvertibleToThreeAddress>,
1267                                 EVEX_V256;
1268    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1269                                    WriteShuffleXLd, _.info128, _.info128,
1270                                    IsConvertibleToThreeAddress>,
1271                                 EVEX_V128;
1272  }
1273}
1274
1275defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1276                                           avx512vl_i8_info, HasBWI, 0>;
1277defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1278                                           avx512vl_i16_info, HasBWI, 0>;
1279defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1280                                           avx512vl_i32_info, HasAVX512, 1>;
1281defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1282                                           avx512vl_i64_info, HasAVX512, 1>, REX_W;
1283
1284multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1285                                      SDPatternOperator OpNode,
1286                                      X86VectorVTInfo _Dst,
1287                                      X86VectorVTInfo _Src> {
1288  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1289                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1290                           (_Dst.VT (OpNode addr:$src))>,
1291                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1292                           AVX5128IBase, EVEX;
1293}
1294
1295// This should be used for the AVX512DQ broadcast instructions. It disables
1296// the unmasked patterns so that we only use the DQ instructions when masking
1297//  is requested.
1298multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1299                                         SDPatternOperator OpNode,
1300                                         X86VectorVTInfo _Dst,
1301                                         X86VectorVTInfo _Src> {
1302  let hasSideEffects = 0, mayLoad = 1 in
1303  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1304                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1305                           (null_frag),
1306                           (_Dst.VT (OpNode addr:$src))>,
1307                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1308                           AVX5128IBase, EVEX;
1309}
1310let Predicates = [HasBWI] in {
1311  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1312            (VPBROADCASTWZrm addr:$src)>;
1313
1314  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1315            (VPBROADCASTWZrr VR128X:$src)>;
1316  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1317            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1318}
1319let Predicates = [HasVLX, HasBWI] in {
1320  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1321            (VPBROADCASTWZ128rm addr:$src)>;
1322  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1323            (VPBROADCASTWZ256rm addr:$src)>;
1324
1325  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1326            (VPBROADCASTWZ128rr VR128X:$src)>;
1327  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1328            (VPBROADCASTWZ256rr VR128X:$src)>;
1329
1330  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1331            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1332  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1333            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1334}
1335
1336//===----------------------------------------------------------------------===//
1337// AVX-512 BROADCAST SUBVECTORS
1338//
1339
1340defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1341                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1342                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1343defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1344                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1345                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1346defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1347                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1348                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1349defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1350                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1351                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1352
1353let Predicates = [HasAVX512] in {
1354def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1355          (VBROADCASTF64X4rm addr:$src)>;
1356def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1357          (VBROADCASTF64X4rm addr:$src)>;
1358def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1359          (VBROADCASTF64X4rm addr:$src)>;
1360def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1361          (VBROADCASTI64X4rm addr:$src)>;
1362def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1363          (VBROADCASTI64X4rm addr:$src)>;
1364def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1365          (VBROADCASTI64X4rm addr:$src)>;
1366def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1367          (VBROADCASTI64X4rm addr:$src)>;
1368
1369def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1370          (VBROADCASTF32X4rm addr:$src)>;
1371def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1372          (VBROADCASTF32X4rm addr:$src)>;
1373def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1374          (VBROADCASTF32X4rm addr:$src)>;
1375def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1376          (VBROADCASTI32X4rm addr:$src)>;
1377def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1378          (VBROADCASTI32X4rm addr:$src)>;
1379def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1380          (VBROADCASTI32X4rm addr:$src)>;
1381def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1382          (VBROADCASTI32X4rm addr:$src)>;
1383
1384// Patterns for selects of bitcasted operations.
1385def : Pat<(vselect_mask VK16WM:$mask,
1386                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1387                        (v16f32 immAllZerosV)),
1388          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1389def : Pat<(vselect_mask VK16WM:$mask,
1390                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1391                        VR512:$src0),
1392          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1393def : Pat<(vselect_mask VK16WM:$mask,
1394                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1395                        (v16i32 immAllZerosV)),
1396          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1397def : Pat<(vselect_mask VK16WM:$mask,
1398                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1399                        VR512:$src0),
1400          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1401
1402def : Pat<(vselect_mask VK8WM:$mask,
1403                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1404                        (v8f64 immAllZerosV)),
1405          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1406def : Pat<(vselect_mask VK8WM:$mask,
1407                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1408                        VR512:$src0),
1409          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1410def : Pat<(vselect_mask VK8WM:$mask,
1411                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1412                        (v8i64 immAllZerosV)),
1413          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1414def : Pat<(vselect_mask VK8WM:$mask,
1415                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1416                        VR512:$src0),
1417          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1418}
1419
1420let Predicates = [HasVLX] in {
1421defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1422                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1423                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1424defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1425                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1426                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1427
1428def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1429          (VBROADCASTF32X4Z256rm addr:$src)>;
1430def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1431          (VBROADCASTF32X4Z256rm addr:$src)>;
1432def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1433          (VBROADCASTF32X4Z256rm addr:$src)>;
1434def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1435          (VBROADCASTI32X4Z256rm addr:$src)>;
1436def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1437          (VBROADCASTI32X4Z256rm addr:$src)>;
1438def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1439          (VBROADCASTI32X4Z256rm addr:$src)>;
1440def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1441          (VBROADCASTI32X4Z256rm addr:$src)>;
1442
1443// Patterns for selects of bitcasted operations.
1444def : Pat<(vselect_mask VK8WM:$mask,
1445                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1446                        (v8f32 immAllZerosV)),
1447          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1448def : Pat<(vselect_mask VK8WM:$mask,
1449                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1450                        VR256X:$src0),
1451          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1452def : Pat<(vselect_mask VK8WM:$mask,
1453                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1454                        (v8i32 immAllZerosV)),
1455          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1456def : Pat<(vselect_mask VK8WM:$mask,
1457                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1458                        VR256X:$src0),
1459          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1460}
1461
1462let Predicates = [HasBF16] in {
1463  def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1464            (VBROADCASTF64X4rm addr:$src)>;
1465  def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1466            (VBROADCASTF32X4rm addr:$src)>;
1467}
1468
1469let Predicates = [HasBF16, HasVLX] in
1470  def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1471            (VBROADCASTF32X4Z256rm addr:$src)>;
1472
1473let Predicates = [HasVLX, HasDQI] in {
1474defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1475                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1476                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1477defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1478                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1479                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1480
1481// Patterns for selects of bitcasted operations.
1482def : Pat<(vselect_mask VK4WM:$mask,
1483                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1484                        (v4f64 immAllZerosV)),
1485          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1486def : Pat<(vselect_mask VK4WM:$mask,
1487                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1488                        VR256X:$src0),
1489          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1490def : Pat<(vselect_mask VK4WM:$mask,
1491                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1492                        (v4i64 immAllZerosV)),
1493          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1494def : Pat<(vselect_mask VK4WM:$mask,
1495                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1496                        VR256X:$src0),
1497          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1498}
1499
1500let Predicates = [HasDQI] in {
1501defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1502                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1503                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1504defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1505                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1506                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1507defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1508                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1509                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1510defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1511                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1512                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1513
1514// Patterns for selects of bitcasted operations.
1515def : Pat<(vselect_mask VK16WM:$mask,
1516                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1517                        (v16f32 immAllZerosV)),
1518          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1519def : Pat<(vselect_mask VK16WM:$mask,
1520                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1521                        VR512:$src0),
1522          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1523def : Pat<(vselect_mask VK16WM:$mask,
1524                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1525                        (v16i32 immAllZerosV)),
1526          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1527def : Pat<(vselect_mask VK16WM:$mask,
1528                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1529                        VR512:$src0),
1530          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1531
1532def : Pat<(vselect_mask VK8WM:$mask,
1533                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1534                        (v8f64 immAllZerosV)),
1535          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1536def : Pat<(vselect_mask VK8WM:$mask,
1537                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1538                        VR512:$src0),
1539          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect_mask VK8WM:$mask,
1541                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1542                        (v8i64 immAllZerosV)),
1543          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect_mask VK8WM:$mask,
1545                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1546                        VR512:$src0),
1547          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1548}
1549
1550multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1551                                        AVX512VLVectorVTInfo _Dst,
1552                                        AVX512VLVectorVTInfo _Src> {
1553  let Predicates = [HasDQI] in
1554    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1555                                          WriteShuffle256Ld, _Dst.info512,
1556                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1557                                          EVEX_V512;
1558  let Predicates = [HasDQI, HasVLX] in
1559    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1560                                          WriteShuffle256Ld, _Dst.info256,
1561                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1562                                          EVEX_V256;
1563}
1564
1565multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1566                                         AVX512VLVectorVTInfo _Dst,
1567                                         AVX512VLVectorVTInfo _Src> :
1568  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1569
1570  let Predicates = [HasDQI, HasVLX] in
1571    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1572                                          WriteShuffleXLd, _Dst.info128,
1573                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1574                                          EVEX_V128;
1575}
1576
1577defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1578                                          avx512vl_i32_info, avx512vl_i64_info>;
1579defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1580                                          avx512vl_f32_info, avx512vl_f64_info>;
1581
1582//===----------------------------------------------------------------------===//
1583// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1584//---
1585multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1586                                  X86VectorVTInfo _, RegisterClass KRC> {
1587  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1588                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1589                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1590                  EVEX, Sched<[WriteShuffle]>;
1591}
1592
1593multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1594                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1595  let Predicates = [HasCDI] in
1596    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1597  let Predicates = [HasCDI, HasVLX] in {
1598    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1599    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1600  }
1601}
1602
1603defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1604                                               avx512vl_i32_info, VK16>;
1605defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1606                                               avx512vl_i64_info, VK8>, REX_W;
1607
1608//===----------------------------------------------------------------------===//
1609// -- VPERMI2 - 3 source operands form --
1610multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1611                         X86FoldableSchedWrite sched,
1612                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1613let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1614    hasSideEffects = 0 in {
1615  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1616          (ins _.RC:$src2, _.RC:$src3),
1617          OpcodeStr, "$src3, $src2", "$src2, $src3",
1618          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1619          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1620
1621  let mayLoad = 1 in
1622  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1623            (ins _.RC:$src2, _.MemOp:$src3),
1624            OpcodeStr, "$src3, $src2", "$src2, $src3",
1625            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1626                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1627            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1628  }
1629}
1630
1631multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1632                            X86FoldableSchedWrite sched,
1633                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1634  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1635      hasSideEffects = 0, mayLoad = 1 in
1636  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1637              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1638              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1639              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1640              (_.VT (X86VPermt2 _.RC:$src2,
1641               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1642              AVX5128IBase, EVEX, VVVV, EVEX_B,
1643              Sched<[sched.Folded, sched.ReadAfterFold]>;
1644}
1645
1646multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1647                               X86FoldableSchedWrite sched,
1648                               AVX512VLVectorVTInfo VTInfo,
1649                               AVX512VLVectorVTInfo ShuffleMask> {
1650  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1651                             ShuffleMask.info512>,
1652               avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1653                                ShuffleMask.info512>, EVEX_V512;
1654  let Predicates = [HasVLX] in {
1655  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1656                                ShuffleMask.info128>,
1657                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1658                                   ShuffleMask.info128>, EVEX_V128;
1659  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1660                                ShuffleMask.info256>,
1661                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1662                                   ShuffleMask.info256>, EVEX_V256;
1663  }
1664}
1665
1666multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1667                                  X86FoldableSchedWrite sched,
1668                                  AVX512VLVectorVTInfo VTInfo,
1669                                  AVX512VLVectorVTInfo Idx,
1670                                  Predicate Prd> {
1671  let Predicates = [Prd] in
1672  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1673                             Idx.info512>, EVEX_V512;
1674  let Predicates = [Prd, HasVLX] in {
1675  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1676                                Idx.info128>, EVEX_V128;
1677  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1678                                Idx.info256>,  EVEX_V256;
1679  }
1680}
1681
1682defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1683                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1684defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1685                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1686defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1687                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1688                  REX_W, EVEX_CD8<16, CD8VF>;
1689defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1690                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1691                  EVEX_CD8<8, CD8VF>;
1692defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1693                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1694defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1695                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1696
1697// Extra patterns to deal with extra bitcasts due to passthru and index being
1698// different types on the fp versions.
1699multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1700                                  X86VectorVTInfo IdxVT,
1701                                  X86VectorVTInfo CastVT> {
1702  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1703                                (X86VPermt2 (_.VT _.RC:$src2),
1704                                            (IdxVT.VT (bitconvert
1705                                                       (CastVT.VT _.RC:$src1))),
1706                                            _.RC:$src3),
1707                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1708            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1709                                                _.RC:$src2, _.RC:$src3)>;
1710  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1711                                (X86VPermt2 _.RC:$src2,
1712                                            (IdxVT.VT (bitconvert
1713                                                       (CastVT.VT _.RC:$src1))),
1714                                            (_.LdFrag addr:$src3)),
1715                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1716            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1717                                                _.RC:$src2, addr:$src3)>;
1718  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1719                                 (X86VPermt2 _.RC:$src2,
1720                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1721                                             (_.BroadcastLdFrag addr:$src3)),
1722                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1723            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1724                                                 _.RC:$src2, addr:$src3)>;
1725}
1726
1727// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1728defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1729defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1730defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1731
1732// VPERMT2
1733multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1734                         X86FoldableSchedWrite sched,
1735                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1736let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1737  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1738          (ins IdxVT.RC:$src2, _.RC:$src3),
1739          OpcodeStr, "$src3, $src2", "$src2, $src3",
1740          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1741          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1742
1743  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1745            OpcodeStr, "$src3, $src2", "$src2, $src3",
1746            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1747                   (_.LdFrag addr:$src3))), 1>,
1748            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1749  }
1750}
1751multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1752                            X86FoldableSchedWrite sched,
1753                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1754  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1755  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1756              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1757              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1758              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1759              (_.VT (X86VPermt2 _.RC:$src1,
1760               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1761              AVX5128IBase, EVEX, VVVV, EVEX_B,
1762              Sched<[sched.Folded, sched.ReadAfterFold]>;
1763}
1764
1765multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1766                               X86FoldableSchedWrite sched,
1767                               AVX512VLVectorVTInfo VTInfo,
1768                               AVX512VLVectorVTInfo ShuffleMask> {
1769  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1770                             ShuffleMask.info512>,
1771               avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1772                                ShuffleMask.info512>, EVEX_V512;
1773  let Predicates = [HasVLX] in {
1774  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1775                                ShuffleMask.info128>,
1776                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1777                                   ShuffleMask.info128>, EVEX_V128;
1778  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1779                                ShuffleMask.info256>,
1780                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1781                                    ShuffleMask.info256>, EVEX_V256;
1782  }
1783}
1784
1785multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1786                                  X86FoldableSchedWrite sched,
1787                                  AVX512VLVectorVTInfo VTInfo,
1788                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1789  let Predicates = [Prd] in
1790  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1791                             Idx.info512>, EVEX_V512;
1792  let Predicates = [Prd, HasVLX] in {
1793  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1794                                Idx.info128>, EVEX_V128;
1795  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1796                                Idx.info256>, EVEX_V256;
1797  }
1798}
1799
1800defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1801                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1803                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1804defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1805                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806                  REX_W, EVEX_CD8<16, CD8VF>;
1807defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1808                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809                  EVEX_CD8<8, CD8VF>;
1810defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1811                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1813                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1814
1815//===----------------------------------------------------------------------===//
1816// AVX-512 - BLEND using mask
1817//
1818
1819multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1820                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1821  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1822  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1823             (ins _.RC:$src1, _.RC:$src2),
1824             !strconcat(OpcodeStr,
1825             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1826             EVEX, VVVV, Sched<[sched]>;
1827  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1828             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1829             !strconcat(OpcodeStr,
1830             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1831             []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1832  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1833             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1834             !strconcat(OpcodeStr,
1835             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1836             []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1837  let mayLoad = 1 in {
1838  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839             (ins _.RC:$src1, _.MemOp:$src2),
1840             !strconcat(OpcodeStr,
1841             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1842             []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1843             Sched<[sched.Folded, sched.ReadAfterFold]>;
1844  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846             !strconcat(OpcodeStr,
1847             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1848             []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1849             Sched<[sched.Folded, sched.ReadAfterFold]>;
1850  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1852             !strconcat(OpcodeStr,
1853             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1854             []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1855             Sched<[sched.Folded, sched.ReadAfterFold]>;
1856  }
1857  }
1858}
1859multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1860                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1861  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1862  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1863      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1864       !strconcat(OpcodeStr,
1865            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1866            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1867      EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1868      Sched<[sched.Folded, sched.ReadAfterFold]>;
1869
1870  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1872       !strconcat(OpcodeStr,
1873            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1874            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1875      EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1876      Sched<[sched.Folded, sched.ReadAfterFold]>;
1877
1878  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1879      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1880       !strconcat(OpcodeStr,
1881            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1882            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1883      EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1884      Sched<[sched.Folded, sched.ReadAfterFold]>;
1885  }
1886}
1887
1888multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1889                        AVX512VLVectorVTInfo VTInfo> {
1890  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1891           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1892                                 EVEX_V512;
1893
1894  let Predicates = [HasVLX] in {
1895    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1896                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1897                                      EVEX_V256;
1898    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1899                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1900                                      EVEX_V128;
1901  }
1902}
1903
1904multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1905                        AVX512VLVectorVTInfo VTInfo> {
1906  let Predicates = [HasBWI] in
1907    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1908                               EVEX_V512;
1909
1910  let Predicates = [HasBWI, HasVLX] in {
1911    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1912                                  EVEX_V256;
1913    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1914                                  EVEX_V128;
1915  }
1916}
1917
1918defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1919                              avx512vl_f32_info>;
1920defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1921                              avx512vl_f64_info>, REX_W;
1922defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1923                              avx512vl_i32_info>;
1924defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1925                              avx512vl_i64_info>, REX_W;
1926defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1927                              avx512vl_i8_info>;
1928defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1929                              avx512vl_i16_info>, REX_W;
1930
1931//===----------------------------------------------------------------------===//
1932// Compare Instructions
1933//===----------------------------------------------------------------------===//
1934
1935// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1936
1937multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1938                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1939                             X86FoldableSchedWrite sched> {
1940  defm  rri_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1941                                       (outs _.KRC:$dst),
1942                                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1943                                       "vcmp"#_.Suffix,
1944                                       "$cc, $src2, $src1", "$src1, $src2, $cc",
1945                                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1946                                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>,
1947                                       EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1948  let mayLoad = 1 in
1949  defm  rmi_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1950                                       (outs _.KRC:$dst),
1951                                       (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1952                                       "vcmp"#_.Suffix,
1953                                       "$cc, $src2, $src1", "$src1, $src2, $cc",
1954                                       (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1955                                           timm:$cc),
1956                                       (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1957                                           timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1958                                       Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1959
1960  let Uses = [MXCSR] in
1961  defm  rrib_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1962                                        (outs _.KRC:$dst),
1963                                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1964                                        "vcmp"#_.Suffix,
1965                                        "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1966                                        (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1967                                                   timm:$cc),
1968                                        (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1969                                                      timm:$cc)>,
1970                                        EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1971
1972  let isCodeGenOnly = 1 in {
1973    let isCommutable = 1 in
1974    def rri : AVX512Ii8<0xC2, MRMSrcReg,
1975                        (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1976                        !strconcat("vcmp", _.Suffix,
1977                                   "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1978                        [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1979                                                  _.FRC:$src2,
1980                                                  timm:$cc))]>,
1981                        EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1982    def rmi : AVX512Ii8<0xC2, MRMSrcMem,
1983                        (outs _.KRC:$dst),
1984                        (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1985                        !strconcat("vcmp", _.Suffix,
1986                                   "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1987                        [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1988                                                  (_.ScalarLdFrag addr:$src2),
1989                                                  timm:$cc))]>,
1990                        EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1991                        Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1992  }
1993}
1994
1995let Predicates = [HasAVX512] in {
1996  let ExeDomain = SSEPackedSingle in
1997  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1998                                   X86cmpms_su, X86cmpmsSAE_su,
1999                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2000  let ExeDomain = SSEPackedDouble in
2001  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2002                                   X86cmpms_su, X86cmpmsSAE_su,
2003                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2004}
2005let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2006  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2007                                   X86cmpms_su, X86cmpmsSAE_su,
2008                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2009
2010multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2011                              X86FoldableSchedWrite sched,
2012                              X86VectorVTInfo _, bit IsCommutable> {
2013  let isCommutable = IsCommutable, hasSideEffects = 0 in
2014  def rr : AVX512BI<opc, MRMSrcReg,
2015             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2016             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2017             []>, EVEX, VVVV, Sched<[sched]>;
2018  let mayLoad = 1, hasSideEffects = 0 in
2019  def rm : AVX512BI<opc, MRMSrcMem,
2020             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2021             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2022             []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2023  let isCommutable = IsCommutable, hasSideEffects = 0 in
2024  def rrk : AVX512BI<opc, MRMSrcReg,
2025              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2026              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2027                          "$dst {${mask}}, $src1, $src2}"),
2028              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2029  let mayLoad = 1, hasSideEffects = 0 in
2030  def rmk : AVX512BI<opc, MRMSrcMem,
2031              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2032              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2033                          "$dst {${mask}}, $src1, $src2}"),
2034              []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2035}
2036
2037multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2038                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2039                                  bit IsCommutable> :
2040           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2041  let mayLoad = 1, hasSideEffects = 0 in {
2042  def rmb : AVX512BI<opc, MRMSrcMem,
2043              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2044              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2045                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2046              []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2047  def rmbk : AVX512BI<opc, MRMSrcMem,
2048               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2049                                       _.ScalarMemOp:$src2),
2050               !strconcat(OpcodeStr,
2051                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2052                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2053               []>, EVEX, VVVV, EVEX_K, EVEX_B,
2054               Sched<[sched.Folded, sched.ReadAfterFold]>;
2055  }
2056}
2057
2058multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2059                                 X86SchedWriteWidths sched,
2060                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2061                                 bit IsCommutable = 0> {
2062  let Predicates = [prd] in
2063  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2064                              VTInfo.info512, IsCommutable>, EVEX_V512;
2065
2066  let Predicates = [prd, HasVLX] in {
2067    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2068                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2069    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2070                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2071  }
2072}
2073
2074multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2075                                     X86SchedWriteWidths sched,
2076                                     AVX512VLVectorVTInfo VTInfo,
2077                                     Predicate prd, bit IsCommutable = 0> {
2078  let Predicates = [prd] in
2079  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2080                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2081
2082  let Predicates = [prd, HasVLX] in {
2083    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2084                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2085    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2086                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2087  }
2088}
2089
2090// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2091// increase the pattern complexity the way an immediate would.
2092let AddedComplexity = 2 in {
2093// FIXME: Is there a better scheduler class for VPCMP?
2094defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2095                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2096                EVEX_CD8<8, CD8VF>, WIG;
2097
2098defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2099                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2100                EVEX_CD8<16, CD8VF>, WIG;
2101
2102defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2103                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2104                EVEX_CD8<32, CD8VF>;
2105
2106defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2107                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2108                T8, REX_W, EVEX_CD8<64, CD8VF>;
2109
2110defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2111                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2112                EVEX_CD8<8, CD8VF>, WIG;
2113
2114defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2115                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2116                EVEX_CD8<16, CD8VF>, WIG;
2117
2118defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2119                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2120                EVEX_CD8<32, CD8VF>;
2121
2122defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2123                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2124                T8, REX_W, EVEX_CD8<64, CD8VF>;
2125}
2126
2127multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2128                          PatFrag Frag_su,
2129                          X86FoldableSchedWrite sched,
2130                          X86VectorVTInfo _, string Name> {
2131  let isCommutable = 1 in
2132  def rri : AVX512AIi8<opc, MRMSrcReg,
2133             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2134             !strconcat("vpcmp", Suffix,
2135                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2137                                                (_.VT _.RC:$src2),
2138                                                cond)))]>,
2139             EVEX, VVVV, Sched<[sched]>;
2140  def rmi : AVX512AIi8<opc, MRMSrcMem,
2141             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2142             !strconcat("vpcmp", Suffix,
2143                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2144             [(set _.KRC:$dst, (_.KVT
2145                                (Frag:$cc
2146                                 (_.VT _.RC:$src1),
2147                                 (_.VT (_.LdFrag addr:$src2)),
2148                                 cond)))]>,
2149             EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2150  let isCommutable = 1 in
2151  def rrik : AVX512AIi8<opc, MRMSrcReg,
2152              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2153                                      u8imm:$cc),
2154              !strconcat("vpcmp", Suffix,
2155                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2156                         "$dst {${mask}}, $src1, $src2, $cc}"),
2157              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2158                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2159                                                         (_.VT _.RC:$src2),
2160                                                         cond))))]>,
2161              EVEX, VVVV, EVEX_K, Sched<[sched]>;
2162  def rmik : AVX512AIi8<opc, MRMSrcMem,
2163              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2164                                    u8imm:$cc),
2165              !strconcat("vpcmp", Suffix,
2166                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2167                         "$dst {${mask}}, $src1, $src2, $cc}"),
2168              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2169                                     (_.KVT
2170                                      (Frag_su:$cc
2171                                       (_.VT _.RC:$src1),
2172                                       (_.VT (_.LdFrag addr:$src2)),
2173                                       cond))))]>,
2174              EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2175
2176  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2177                             (_.VT _.RC:$src1), cond)),
2178            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2179             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2180
2181  def : Pat<(and _.KRCWM:$mask,
2182                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2183                                     (_.VT _.RC:$src1), cond))),
2184            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2185             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2186             (X86pcmpm_imm_commute $cc))>;
2187}
2188
2189multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2190                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2191                              X86VectorVTInfo _, string Name> :
2192           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2193  def rmib : AVX512AIi8<opc, MRMSrcMem,
2194             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2195                                     u8imm:$cc),
2196             !strconcat("vpcmp", Suffix,
2197                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2198                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2199             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2200                                       (_.VT _.RC:$src1),
2201                                       (_.BroadcastLdFrag addr:$src2),
2202                                       cond)))]>,
2203             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2205              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2206                                       _.ScalarMemOp:$src2, u8imm:$cc),
2207              !strconcat("vpcmp", Suffix,
2208                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2209                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2210              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2211                                     (_.KVT (Frag_su:$cc
2212                                             (_.VT _.RC:$src1),
2213                                             (_.BroadcastLdFrag addr:$src2),
2214                                             cond))))]>,
2215              EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2216
2217  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2218                    (_.VT _.RC:$src1), cond)),
2219            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2220             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2221
2222  def : Pat<(and _.KRCWM:$mask,
2223                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2224                                     (_.VT _.RC:$src1), cond))),
2225            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2226             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2227             (X86pcmpm_imm_commute $cc))>;
2228}
2229
2230multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2231                             PatFrag Frag_su, X86SchedWriteWidths sched,
2232                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2233  let Predicates = [prd] in
2234  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2235                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2236
2237  let Predicates = [prd, HasVLX] in {
2238    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2239                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2240    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2241                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2242  }
2243}
2244
2245multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2246                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2247                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2248  let Predicates = [prd] in
2249  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2250                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2251
2252  let Predicates = [prd, HasVLX] in {
2253    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2254                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2255    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2256                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2257  }
2258}
2259
2260// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2261defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2262                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2263                                EVEX_CD8<8, CD8VF>;
2264defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2265                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2266                                 EVEX_CD8<8, CD8VF>;
2267
2268defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2269                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2270                                REX_W, EVEX_CD8<16, CD8VF>;
2271defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2272                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2273                                 REX_W, EVEX_CD8<16, CD8VF>;
2274
2275defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2276                                    SchedWriteVecALU, avx512vl_i32_info,
2277                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2278defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2279                                     SchedWriteVecALU, avx512vl_i32_info,
2280                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2281
2282defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2283                                    SchedWriteVecALU, avx512vl_i64_info,
2284                                    HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2285defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2286                                     SchedWriteVecALU, avx512vl_i64_info,
2287                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2288
2289multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2290                              string Name> {
2291let Uses = [MXCSR], mayRaiseFPException = 1 in {
2292  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2293                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2294                   "vcmp"#_.Suffix,
2295                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2296                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2297                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2298                   1>, Sched<[sched]>;
2299
2300  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2301                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2302                "vcmp"#_.Suffix,
2303                "$cc, $src2, $src1", "$src1, $src2, $cc",
2304                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2305                             timm:$cc),
2306                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2307                            timm:$cc)>,
2308                Sched<[sched.Folded, sched.ReadAfterFold]>;
2309
2310  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2311                (outs _.KRC:$dst),
2312                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2313                "vcmp"#_.Suffix,
2314                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2315                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2316                (X86any_cmpm (_.VT _.RC:$src1),
2317                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2318                             timm:$cc),
2319                (X86cmpm_su (_.VT _.RC:$src1),
2320                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2321                            timm:$cc)>,
2322                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2323  }
2324
2325  // Patterns for selecting with loads in other operand.
2326  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2327                         timm:$cc),
2328            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2329                                                      (X86cmpm_imm_commute timm:$cc))>;
2330
2331  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2332                                            (_.VT _.RC:$src1),
2333                                            timm:$cc)),
2334            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2335                                                       _.RC:$src1, addr:$src2,
2336                                                       (X86cmpm_imm_commute timm:$cc))>;
2337
2338  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2339                         (_.VT _.RC:$src1), timm:$cc),
2340            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2341                                                       (X86cmpm_imm_commute timm:$cc))>;
2342
2343  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2344                                            (_.VT _.RC:$src1),
2345                                            timm:$cc)),
2346            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2347                                                        _.RC:$src1, addr:$src2,
2348                                                        (X86cmpm_imm_commute timm:$cc))>;
2349
2350  // Patterns for mask intrinsics.
2351  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2352                      (_.KVT immAllOnesV)),
2353            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2354
2355  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2356            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2357                                                       _.RC:$src2, timm:$cc)>;
2358
2359  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2360                      (_.KVT immAllOnesV)),
2361            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2362
2363  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2364                      _.KRCWM:$mask),
2365            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2366                                                       addr:$src2, timm:$cc)>;
2367
2368  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2369                      (_.KVT immAllOnesV)),
2370            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2371
2372  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2373                      _.KRCWM:$mask),
2374            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2375                                                        addr:$src2, timm:$cc)>;
2376
2377  // Patterns for mask intrinsics with loads in other operand.
2378  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2379                      (_.KVT immAllOnesV)),
2380            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2381                                                      (X86cmpm_imm_commute timm:$cc))>;
2382
2383  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2384                      _.KRCWM:$mask),
2385            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2386                                                       _.RC:$src1, addr:$src2,
2387                                                       (X86cmpm_imm_commute timm:$cc))>;
2388
2389  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2390                      (_.KVT immAllOnesV)),
2391            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2392                                                       (X86cmpm_imm_commute timm:$cc))>;
2393
2394  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2395                      _.KRCWM:$mask),
2396            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2397                                                        _.RC:$src1, addr:$src2,
2398                                                        (X86cmpm_imm_commute  timm:$cc))>;
2399}
2400
2401multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2402  // comparison code form (VCMP[EQ/LT/LE/...]
2403  let Uses = [MXCSR] in
2404  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2405                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2406                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2407                     "vcmp"#_.Suffix,
2408                     "$cc, {sae}, $src2, $src1",
2409                     "$src1, $src2, {sae}, $cc",
2410                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2411                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2412                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2413                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2414                     EVEX_B, Sched<[sched]>;
2415}
2416
2417multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2418                       Predicate Pred = HasAVX512> {
2419  let Predicates = [Pred] in {
2420    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2421                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2422
2423  }
2424  let Predicates = [Pred,HasVLX] in {
2425   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2426   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2427  }
2428}
2429
2430defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2431                          AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2432defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2433                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2434defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2435                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2436
2437// Patterns to select fp compares with load as first operand.
2438let Predicates = [HasAVX512] in {
2439  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2440            (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2441
2442  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2443            (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2444}
2445
2446let Predicates = [HasFP16] in {
2447  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2448            (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2449}
2450
2451// ----------------------------------------------------------------
2452// FPClass
2453
2454//handle fpclass instruction  mask =  op(reg_scalar,imm)
2455//                                    op(mem_scalar,imm)
2456multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2457                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2458                                 Predicate prd> {
2459  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2460      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2461                      (ins _.RC:$src1, i32u8imm:$src2),
2462                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2463                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2464                              (i32 timm:$src2)))]>,
2465                      Sched<[sched]>;
2466      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2467                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2468                      OpcodeStr#_.Suffix#
2469                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2470                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2471                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2472                                      (i32 timm:$src2))))]>,
2473                      EVEX_K, Sched<[sched]>;
2474    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2475                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2476                    OpcodeStr#_.Suffix#
2477                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2478                    [(set _.KRC:$dst,
2479                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2480                                        (i32 timm:$src2)))]>,
2481                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2482    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2483                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2484                    OpcodeStr#_.Suffix#
2485                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2486                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2487                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2488                            (i32 timm:$src2))))]>,
2489                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2490  }
2491}
2492
2493//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2494//                                  fpclass(reg_vec, mem_vec, imm)
2495//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2496multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2497                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2498                                 string mem>{
2499  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2500  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2501                      (ins _.RC:$src1, i32u8imm:$src2),
2502                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2503                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2504                                       (i32 timm:$src2)))]>,
2505                      Sched<[sched]>;
2506  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2507                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2508                      OpcodeStr#_.Suffix#
2509                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2510                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2511                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2512                                       (i32 timm:$src2))))]>,
2513                      EVEX_K, Sched<[sched]>;
2514  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2515                    (ins _.MemOp:$src1, i32u8imm:$src2),
2516                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2517                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2518                    [(set _.KRC:$dst,(X86Vfpclass
2519                                     (_.VT (_.LdFrag addr:$src1)),
2520                                     (i32 timm:$src2)))]>,
2521                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2522  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2523                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2524                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2525                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2526                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2527                                  (_.VT (_.LdFrag addr:$src1)),
2528                                  (i32 timm:$src2))))]>,
2529                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2530  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2531                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2532                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2533                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2534                                                  #_.BroadcastStr#", $src2}",
2535                    [(set _.KRC:$dst,(X86Vfpclass
2536                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2537                                     (i32 timm:$src2)))]>,
2538                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2539  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2540                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2541                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2542                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2543                                                   _.BroadcastStr#", $src2}",
2544                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2545                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2546                                     (i32 timm:$src2))))]>,
2547                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2548  }
2549
2550  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2551  // the memory form.
2552  def : InstAlias<OpcodeStr#_.Suffix#mem#
2553                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2554                  (!cast<Instruction>(NAME#"rr")
2555                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2556  def : InstAlias<OpcodeStr#_.Suffix#mem#
2557                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2558                  (!cast<Instruction>(NAME#"rrk")
2559                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2560  def : InstAlias<OpcodeStr#_.Suffix#mem#
2561                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2562                  _.BroadcastStr#", $src2}",
2563                  (!cast<Instruction>(NAME#"rmb")
2564                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2565  def : InstAlias<OpcodeStr#_.Suffix#mem#
2566                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2567                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2568                  (!cast<Instruction>(NAME#"rmbk")
2569                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2570}
2571
2572multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2573                                     bits<8> opc, X86SchedWriteWidths sched,
2574                                     Predicate prd>{
2575  let Predicates = [prd] in {
2576    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2577                                      _.info512, "z">, EVEX_V512;
2578  }
2579  let Predicates = [prd, HasVLX] in {
2580    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2581                                      _.info128, "x">, EVEX_V128;
2582    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2583                                      _.info256, "y">, EVEX_V256;
2584  }
2585}
2586
2587multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2588                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2589  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2590                                      sched, HasFP16>,
2591                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2592  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593                                   sched.Scl, f16x_info, HasFP16>,
2594                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2595  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2596                                      sched, HasDQI>,
2597                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2598  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2599                                      sched, HasDQI>,
2600                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2601  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2602                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2603                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2604  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2605                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2606                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2607}
2608
2609defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2610
2611//-----------------------------------------------------------------
2612// Mask register copy, including
2613// - copy between mask registers
2614// - load/store mask registers
2615// - copy from GPR to mask register and vice versa
2616//
2617multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2618                          string OpcodeStr, RegisterClass KRC, ValueType vvt,
2619                          X86MemOperand x86memop, string Suffix = ""> {
2620  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2621      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2622  def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2623                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2624                  Sched<[WriteMove]>;
2625  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2626                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2627                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
2628                  Sched<[WriteLoad]>, NoCD8;
2629  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2630                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2631                    [(store KRC:$src, addr:$dst)]>,
2632                  Sched<[WriteStore]>, NoCD8;
2633}
2634
2635multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2636                               string OpcodeStr, RegisterClass KRC,
2637                               RegisterClass GRC, string Suffix = ""> {
2638  let hasSideEffects = 0 in {
2639    def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2640                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2641                    Sched<[WriteMove]>;
2642    def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2643                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2644                    Sched<[WriteMove]>;
2645  }
2646}
2647
2648let Predicates = [HasDQI, NoEGPR] in
2649  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2650               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2651               VEX, TB, PD;
2652let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2653  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2654               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2655               EVEX, TB, PD;
2656
2657let Predicates = [HasAVX512, NoEGPR] in
2658  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2659               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2660               VEX, TB;
2661let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2662  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2663               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2664               EVEX, TB;
2665
2666let Predicates = [HasBWI, NoEGPR] in {
2667  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2668               VEX, TB, PD, REX_W;
2669  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2670               VEX, TB, XD;
2671  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2672               VEX, TB, REX_W;
2673  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2674               VEX, TB, XD, REX_W;
2675}
2676let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2677  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2678               EVEX, TB, PD, REX_W;
2679  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2680               EVEX, TB, XD;
2681  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2682               EVEX, TB, REX_W;
2683  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2684               EVEX, TB, XD, REX_W;
2685}
2686
2687// GR from/to mask register
2688def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2689          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2690def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2691          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2692def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2693          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2694
2695def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2696          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2697def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2698          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2699
2700def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2701          (KMOVWrk VK16:$src)>;
2702def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2703          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2704def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2705          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2706def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2707          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2708
2709def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2710          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2711def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2712          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2713def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2714          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2715def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2716          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2717
2718def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2719          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2720def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2721          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2722def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2723          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2724def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2725          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2726
2727// Load/store kreg
2728let Predicates = [HasDQI] in {
2729  def : Pat<(v1i1 (load addr:$src)),
2730            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2731  def : Pat<(v2i1 (load addr:$src)),
2732            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2733  def : Pat<(v4i1 (load addr:$src)),
2734            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2735}
2736
2737let Predicates = [HasAVX512] in {
2738  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2739            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2740  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2741            (KMOVWkm addr:$src)>;
2742}
2743
2744def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2745                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2746                                              SDTCVecEltisVT<1, i1>,
2747                                              SDTCisPtrTy<2>]>>;
2748
2749let Predicates = [HasAVX512] in {
2750  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2753
2754    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2755              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2756
2757    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2758              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2759
2760    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2761              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2762  }
2763
2764  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2765  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2766  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2767  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2768  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2769  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2770  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2771
2772  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2773                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2774            (KMOVWkr (AND32ri
2775                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2776                      (i32 1)))>;
2777}
2778
2779// Mask unary operation
2780// - KNOT
2781multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2782                            RegisterClass KRC, SDPatternOperator OpNode,
2783                            X86FoldableSchedWrite sched, Predicate prd> {
2784  let Predicates = [prd] in
2785    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2786               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2787               [(set KRC:$dst, (OpNode KRC:$src))]>,
2788               Sched<[sched]>;
2789}
2790
2791multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2792                                SDPatternOperator OpNode,
2793                                X86FoldableSchedWrite sched> {
2794  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2795                            sched, HasDQI>, VEX, TB, PD;
2796  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2797                            sched, HasAVX512>, VEX, TB;
2798  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2799                            sched, HasBWI>, VEX, TB, PD, REX_W;
2800  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2801                            sched, HasBWI>, VEX, TB, REX_W;
2802}
2803
2804// TODO - do we need a X86SchedWriteWidths::KMASK type?
2805defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2806
2807// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2808let Predicates = [HasAVX512, NoDQI] in
2809def : Pat<(vnot VK8:$src),
2810          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2811
2812def : Pat<(vnot VK4:$src),
2813          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2814def : Pat<(vnot VK2:$src),
2815          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2816def : Pat<(vnot VK1:$src),
2817          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2818
2819// Mask binary operation
2820// - KAND, KANDN, KOR, KXNOR, KXOR
2821multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2822                           RegisterClass KRC, SDPatternOperator OpNode,
2823                           X86FoldableSchedWrite sched, Predicate prd,
2824                           bit IsCommutable> {
2825  let Predicates = [prd], isCommutable = IsCommutable in
2826    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827               !strconcat(OpcodeStr,
2828                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2830               Sched<[sched]>;
2831}
2832
2833multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834                                 SDPatternOperator OpNode,
2835                                 X86FoldableSchedWrite sched, bit IsCommutable,
2836                                 Predicate prdW = HasAVX512> {
2837  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2838                             sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2839  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2840                             sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2841  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2842                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2843  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2844                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2845}
2846
2847// TODO - do we need a X86SchedWriteWidths::KMASK type?
2848defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2849defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2850defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2851defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2852defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2853defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2854
2855multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2856                            Instruction Inst> {
2857  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2858  // for the DQI set, this type is legal and KxxxB instruction is used
2859  let Predicates = [NoDQI] in
2860  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2861            (COPY_TO_REGCLASS
2862              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2863                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2864
2865  // All types smaller than 8 bits require conversion anyway
2866  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2867        (COPY_TO_REGCLASS (Inst
2868                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2869                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2870  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2871        (COPY_TO_REGCLASS (Inst
2872                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2873                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2874  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2875        (COPY_TO_REGCLASS (Inst
2876                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2877                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2878}
2879
2880defm : avx512_binop_pat<and,   KANDWrr>;
2881defm : avx512_binop_pat<vandn, KANDNWrr>;
2882defm : avx512_binop_pat<or,    KORWrr>;
2883defm : avx512_binop_pat<vxnor, KXNORWrr>;
2884defm : avx512_binop_pat<xor,   KXORWrr>;
2885
2886// Mask unpacking
2887multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2888                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2889                             Predicate prd> {
2890  let Predicates = [prd] in {
2891    let hasSideEffects = 0 in
2892    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2893               (ins Src.KRC:$src1, Src.KRC:$src2),
2894               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2895               VEX, VVVV, VEX_L, Sched<[sched]>;
2896
2897    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2898              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2899  }
2900}
2901
2902defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2903defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2904defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2905
2906// Mask bit testing
2907multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2908                              SDNode OpNode, X86FoldableSchedWrite sched,
2909                              Predicate prd> {
2910  let Predicates = [prd], Defs = [EFLAGS] in
2911    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2912               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2913               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2914               Sched<[sched]>;
2915}
2916
2917multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2918                                X86FoldableSchedWrite sched,
2919                                Predicate prdW = HasAVX512> {
2920  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2921                                                                VEX, TB, PD;
2922  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2923                                                                VEX, TB;
2924  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2925                                                                VEX, TB, REX_W;
2926  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2927                                                                VEX, TB, PD, REX_W;
2928}
2929
2930// TODO - do we need a X86SchedWriteWidths::KMASK type?
2931defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2932defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2933
2934// Mask shift
2935multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2936                               SDNode OpNode, X86FoldableSchedWrite sched> {
2937  let Predicates = [HasAVX512] in
2938    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2939                 !strconcat(OpcodeStr,
2940                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2941                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2942                 Sched<[sched]>;
2943}
2944
2945multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2946                                 SDNode OpNode, X86FoldableSchedWrite sched> {
2947  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2948                               sched>, VEX, TA, PD, REX_W;
2949  let Predicates = [HasDQI] in
2950  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2951                               sched>, VEX, TA, PD;
2952  let Predicates = [HasBWI] in {
2953  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2954                               sched>, VEX, TA, PD, REX_W;
2955  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2956                               sched>, VEX, TA, PD;
2957  }
2958}
2959
2960defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2961defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2962
2963// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2964multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2965                                                 string InstStr,
2966                                                 X86VectorVTInfo Narrow,
2967                                                 X86VectorVTInfo Wide> {
2968def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2969                                (Narrow.VT Narrow.RC:$src2), cond)),
2970          (COPY_TO_REGCLASS
2971           (!cast<Instruction>(InstStr#"Zrri")
2972            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2973            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2974            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2975
2976def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2977                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2978                                                    (Narrow.VT Narrow.RC:$src2),
2979                                                    cond)))),
2980          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2981           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2982           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2983           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2984           (X86pcmpm_imm $cc)), Narrow.KRC)>;
2985}
2986
2987multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2988                                                     string InstStr,
2989                                                     X86VectorVTInfo Narrow,
2990                                                     X86VectorVTInfo Wide> {
2991// Broadcast load.
2992def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2993                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
2994          (COPY_TO_REGCLASS
2995           (!cast<Instruction>(InstStr#"Zrmib")
2996            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2997            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2998
2999def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3000                           (Narrow.KVT
3001                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3002                                         (Narrow.BroadcastLdFrag addr:$src2),
3003                                         cond)))),
3004          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3005           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3006           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3007           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3008
3009// Commuted with broadcast load.
3010def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3011                                (Narrow.VT Narrow.RC:$src1),
3012                                cond)),
3013          (COPY_TO_REGCLASS
3014           (!cast<Instruction>(InstStr#"Zrmib")
3015            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3016            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3017
3018def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3019                           (Narrow.KVT
3020                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3021                                         (Narrow.VT Narrow.RC:$src1),
3022                                         cond)))),
3023          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3024           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3025           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3027}
3028
3029// Same as above, but for fp types which don't use PatFrags.
3030multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3031                                                X86VectorVTInfo Narrow,
3032                                                X86VectorVTInfo Wide> {
3033def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3034                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3035          (COPY_TO_REGCLASS
3036           (!cast<Instruction>(InstStr#"Zrri")
3037            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3038            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3039            timm:$cc), Narrow.KRC)>;
3040
3041def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3042                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3043                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3044          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3045           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3046           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3047           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3048           timm:$cc), Narrow.KRC)>;
3049
3050// Broadcast load.
3051def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3052                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3053          (COPY_TO_REGCLASS
3054           (!cast<Instruction>(InstStr#"Zrmbi")
3055            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3056            addr:$src2, timm:$cc), Narrow.KRC)>;
3057
3058def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3059                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3060                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3061          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3062           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3063           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064           addr:$src2, timm:$cc), Narrow.KRC)>;
3065
3066// Commuted with broadcast load.
3067def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3068                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3069          (COPY_TO_REGCLASS
3070           (!cast<Instruction>(InstStr#"Zrmbi")
3071            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3072            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3073
3074def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3075                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3076                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3077          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3078           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3079           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3080           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3081}
3082
3083let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3084  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3086
3087  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3089
3090  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3092
3093  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3095
3096  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3097  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3098
3099  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3100  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3101
3102  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3103  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3104
3105  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3106  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3107
3108  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3109  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3110  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3111  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3112}
3113
3114let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3115  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3116  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3117
3118  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3119  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3120
3121  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3122  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3123
3124  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3125  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3126}
3127
3128// Mask setting all 0s or 1s
3129multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3130  let Predicates = [HasAVX512] in
3131    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3132        SchedRW = [WriteZero] in
3133      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3134                     [(set KRC:$dst, (VT Val))]>;
3135}
3136
3137multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3138  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3139  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3140  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3141}
3142
3143defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3144defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3145
3146// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3147let Predicates = [HasAVX512] in {
3148  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3149  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3150  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3151  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3152  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3153  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3154  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3155  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3156}
3157
3158// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3159multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3160                                             RegisterClass RC, ValueType VT> {
3161  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3162            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3163
3164  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3165            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3166}
3167defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3168defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3169defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3170defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3171defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3172defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3173
3174defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3175defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3176defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3177defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3178defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3179
3180defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3181defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3182defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3183defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3184
3185defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3186defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3187defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3188
3189defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3190defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3191
3192defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3193
3194//===----------------------------------------------------------------------===//
3195// AVX-512 - Aligned and unaligned load and store
3196//
3197
3198multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3199                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3200                       X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3201                       SDPatternOperator SelectOprr = vselect> {
3202  let hasSideEffects = 0 in {
3203  let isMoveReg = 1 in
3204  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3205                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3206                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3207  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3208                      (ins _.KRCWM:$mask,  _.RC:$src),
3209                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3210                       "${dst} {${mask}} {z}, $src}"),
3211                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3212                                           (_.VT _.RC:$src),
3213                                           _.ImmAllZerosV)))], _.ExeDomain>,
3214                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3215
3216  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3217  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3218                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3219                    !if(NoRMPattern, [],
3220                        [(set _.RC:$dst,
3221                          (_.VT (ld_frag addr:$src)))]),
3222                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3223
3224  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3225    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3226                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3227                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3228                      "${dst} {${mask}}, $src1}"),
3229                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3230                                          (_.VT _.RC:$src1),
3231                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3232                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3233    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3234                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3235                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3236                      "${dst} {${mask}}, $src1}"),
3237                     [(set _.RC:$dst, (_.VT
3238                         (vselect_mask _.KRCWM:$mask,
3239                          (_.VT (ld_frag addr:$src1)),
3240                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3241                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3242  }
3243  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3244                  (ins _.KRCWM:$mask, _.MemOp:$src),
3245                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3246                                "${dst} {${mask}} {z}, $src}",
3247                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3248                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3249                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3250  }
3251  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3252            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3253
3254  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3255            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3256
3257  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3258            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3259             _.KRCWM:$mask, addr:$ptr)>;
3260}
3261
3262multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3263                                 AVX512VLVectorVTInfo _, Predicate prd,
3264                                 X86SchedWriteMoveLSWidths Sched,
3265                                 bit NoRMPattern = 0> {
3266  let Predicates = [prd] in
3267  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3268                       _.info512.AlignedLdFrag, masked_load_aligned,
3269                       Sched.ZMM, NoRMPattern>, EVEX_V512;
3270
3271  let Predicates = [prd, HasVLX] in {
3272  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3273                          _.info256.AlignedLdFrag, masked_load_aligned,
3274                          Sched.YMM, NoRMPattern>, EVEX_V256;
3275  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3276                          _.info128.AlignedLdFrag, masked_load_aligned,
3277                          Sched.XMM, NoRMPattern>, EVEX_V128;
3278  }
3279}
3280
3281multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3282                          AVX512VLVectorVTInfo _, Predicate prd,
3283                          X86SchedWriteMoveLSWidths Sched,
3284                          bit NoRMPattern = 0,
3285                          SDPatternOperator SelectOprr = vselect> {
3286  let Predicates = [prd] in
3287  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3288                       masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3289
3290  let Predicates = [prd, HasVLX] in {
3291  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3292                         masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3293  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3294                         masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3295  }
3296}
3297
3298multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3299                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3300                        X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3301  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3302  let isMoveReg = 1 in
3303  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3304                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3305                         [], _.ExeDomain>, EVEX,
3306                         Sched<[Sched.RR]>;
3307  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3308                         (ins _.KRCWM:$mask, _.RC:$src),
3309                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3310                         "${dst} {${mask}}, $src}",
3311                         [], _.ExeDomain>,  EVEX, EVEX_K,
3312                         Sched<[Sched.RR]>;
3313  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3314                          (ins _.KRCWM:$mask, _.RC:$src),
3315                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3316                          "${dst} {${mask}} {z}, $src}",
3317                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3318                          Sched<[Sched.RR]>;
3319  }
3320
3321  let hasSideEffects = 0, mayStore = 1 in
3322  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3323                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3324                    !if(NoMRPattern, [],
3325                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3326                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3327  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3328                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3329              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3330               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3331
3332  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3333           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3334                                                        _.KRCWM:$mask, _.RC:$src)>;
3335
3336  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3337                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3338                   _.RC:$dst, _.RC:$src), 0>;
3339  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3340                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3341                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3342  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3343                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3344                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3345}
3346
3347multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3348                            AVX512VLVectorVTInfo _, Predicate prd,
3349                            X86SchedWriteMoveLSWidths Sched,
3350                            bit NoMRPattern = 0> {
3351  let Predicates = [prd] in
3352  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3353                        masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3354  let Predicates = [prd, HasVLX] in {
3355    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3356                             masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3357    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3358                             masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3359  }
3360}
3361
3362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3363                                  AVX512VLVectorVTInfo _, Predicate prd,
3364                                  X86SchedWriteMoveLSWidths Sched,
3365                                  bit NoMRPattern = 0> {
3366  let Predicates = [prd] in
3367  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3368                        masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3369
3370  let Predicates = [prd, HasVLX] in {
3371    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3372                             masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3373    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3374                             masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3375  }
3376}
3377
3378defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3379                                     HasAVX512, SchedWriteFMoveLS>,
3380               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3381                                      HasAVX512, SchedWriteFMoveLS>,
3382               TB, EVEX_CD8<32, CD8VF>;
3383
3384defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3385                                     HasAVX512, SchedWriteFMoveLS>,
3386               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3387                                      HasAVX512, SchedWriteFMoveLS>,
3388               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3389
3390defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3391                              SchedWriteFMoveLS, 0, null_frag>,
3392               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3393                               SchedWriteFMoveLS>,
3394                               TB, EVEX_CD8<32, CD8VF>;
3395
3396defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3397                              SchedWriteFMoveLS, 0, null_frag>,
3398               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3399                               SchedWriteFMoveLS>,
3400               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3401
3402defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3403                                       HasAVX512, SchedWriteVecMoveLS, 1>,
3404                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3405                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3406                 TB, PD, EVEX_CD8<32, CD8VF>;
3407
3408defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3409                                       HasAVX512, SchedWriteVecMoveLS>,
3410                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3411                                        HasAVX512, SchedWriteVecMoveLS>,
3412                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3413
3414defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3415                               SchedWriteVecMoveLS, 1>,
3416                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3417                                SchedWriteVecMoveLS, 1>,
3418                TB, XD, EVEX_CD8<8, CD8VF>;
3419
3420defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3421                                SchedWriteVecMoveLS, 1>,
3422                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3423                                 SchedWriteVecMoveLS, 1>,
3424                 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3425
3426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3427                                SchedWriteVecMoveLS, 1, null_frag>,
3428                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3429                                 SchedWriteVecMoveLS, 1>,
3430                 TB, XS, EVEX_CD8<32, CD8VF>;
3431
3432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3433                                SchedWriteVecMoveLS, 0, null_frag>,
3434                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3435                                 SchedWriteVecMoveLS>,
3436                 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3437
3438// Special instructions to help with spilling when we don't have VLX. We need
3439// to load or store from a ZMM register instead. These are converted in
3440// expandPostRAPseudos.
3441let isReMaterializable = 1, canFoldAsLoad = 1,
3442    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3444                            "", []>, Sched<[WriteFLoadX]>;
3445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3446                            "", []>, Sched<[WriteFLoadY]>;
3447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3448                            "", []>, Sched<[WriteFLoadX]>;
3449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3450                            "", []>, Sched<[WriteFLoadY]>;
3451}
3452
3453let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3455                            "", []>, Sched<[WriteFStoreX]>;
3456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3457                            "", []>, Sched<[WriteFStoreY]>;
3458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3459                            "", []>, Sched<[WriteFStoreX]>;
3460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3461                            "", []>, Sched<[WriteFStoreY]>;
3462}
3463
3464def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3465                          (v8i64 VR512:$src))),
3466   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3467                                              VK8), VR512:$src)>;
3468
3469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3470                           (v16i32 VR512:$src))),
3471                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3472
3473// These patterns exist to prevent the above patterns from introducing a second
3474// mask inversion when one already exists.
3475def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3476                          (v8i64 immAllZerosV),
3477                          (v8i64 VR512:$src))),
3478                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3480                           (v16i32 immAllZerosV),
3481                           (v16i32 VR512:$src))),
3482                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3483
3484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485                              X86VectorVTInfo Wide> {
3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487                               Narrow.RC:$src1, Narrow.RC:$src0)),
3488           (EXTRACT_SUBREG
3489            (Wide.VT
3490             (!cast<Instruction>(InstrStr#"rrk")
3491              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494            Narrow.SubRegIdx)>;
3495
3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3498           (EXTRACT_SUBREG
3499            (Wide.VT
3500             (!cast<Instruction>(InstrStr#"rrkz")
3501              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3503            Narrow.SubRegIdx)>;
3504}
3505
3506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507// available. Use a 512-bit operation and extract.
3508let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3509  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3511  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3513
3514  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3518}
3519
3520let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3521  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3523
3524  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3526
3527  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3528  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3529
3530  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3531  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3532}
3533
3534let Predicates = [HasAVX512] in {
3535  // 512-bit load.
3536  def : Pat<(alignedloadv16i32 addr:$src),
3537            (VMOVDQA64Zrm addr:$src)>;
3538  def : Pat<(alignedloadv32i16 addr:$src),
3539            (VMOVDQA64Zrm addr:$src)>;
3540  def : Pat<(alignedloadv32f16 addr:$src),
3541            (VMOVAPSZrm addr:$src)>;
3542  def : Pat<(alignedloadv32bf16 addr:$src),
3543            (VMOVAPSZrm addr:$src)>;
3544  def : Pat<(alignedloadv64i8 addr:$src),
3545            (VMOVDQA64Zrm addr:$src)>;
3546  def : Pat<(loadv16i32 addr:$src),
3547            (VMOVDQU64Zrm addr:$src)>;
3548  def : Pat<(loadv32i16 addr:$src),
3549            (VMOVDQU64Zrm addr:$src)>;
3550  def : Pat<(loadv32f16 addr:$src),
3551            (VMOVUPSZrm addr:$src)>;
3552  def : Pat<(loadv32bf16 addr:$src),
3553            (VMOVUPSZrm addr:$src)>;
3554  def : Pat<(loadv64i8 addr:$src),
3555            (VMOVDQU64Zrm addr:$src)>;
3556
3557  // 512-bit store.
3558  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3559            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3560  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3561            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3562  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3563            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3564  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3565            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3566  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3567            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3569            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3570  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3571            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3572  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3573            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3574  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3575            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3576  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3577            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3578}
3579
3580let Predicates = [HasVLX] in {
3581  // 128-bit load.
3582  def : Pat<(alignedloadv4i32 addr:$src),
3583            (VMOVDQA64Z128rm addr:$src)>;
3584  def : Pat<(alignedloadv8i16 addr:$src),
3585            (VMOVDQA64Z128rm addr:$src)>;
3586  def : Pat<(alignedloadv8f16 addr:$src),
3587            (VMOVAPSZ128rm addr:$src)>;
3588  def : Pat<(alignedloadv8bf16 addr:$src),
3589            (VMOVAPSZ128rm addr:$src)>;
3590  def : Pat<(alignedloadv16i8 addr:$src),
3591            (VMOVDQA64Z128rm addr:$src)>;
3592  def : Pat<(loadv4i32 addr:$src),
3593            (VMOVDQU64Z128rm addr:$src)>;
3594  def : Pat<(loadv8i16 addr:$src),
3595            (VMOVDQU64Z128rm addr:$src)>;
3596  def : Pat<(loadv8f16 addr:$src),
3597            (VMOVUPSZ128rm addr:$src)>;
3598  def : Pat<(loadv8bf16 addr:$src),
3599            (VMOVUPSZ128rm addr:$src)>;
3600  def : Pat<(loadv16i8 addr:$src),
3601            (VMOVDQU64Z128rm addr:$src)>;
3602
3603  // 128-bit store.
3604  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3605            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3606  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3607            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3608  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3609            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3610  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3611            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3612  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3613            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3614  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3615            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3616  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3617            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3618  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3619            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3620  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3621            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3622  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3623            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3624
3625  // 256-bit load.
3626  def : Pat<(alignedloadv8i32 addr:$src),
3627            (VMOVDQA64Z256rm addr:$src)>;
3628  def : Pat<(alignedloadv16i16 addr:$src),
3629            (VMOVDQA64Z256rm addr:$src)>;
3630  def : Pat<(alignedloadv16f16 addr:$src),
3631            (VMOVAPSZ256rm addr:$src)>;
3632  def : Pat<(alignedloadv16bf16 addr:$src),
3633            (VMOVAPSZ256rm addr:$src)>;
3634  def : Pat<(alignedloadv32i8 addr:$src),
3635            (VMOVDQA64Z256rm addr:$src)>;
3636  def : Pat<(loadv8i32 addr:$src),
3637            (VMOVDQU64Z256rm addr:$src)>;
3638  def : Pat<(loadv16i16 addr:$src),
3639            (VMOVDQU64Z256rm addr:$src)>;
3640  def : Pat<(loadv16f16 addr:$src),
3641            (VMOVUPSZ256rm addr:$src)>;
3642  def : Pat<(loadv16bf16 addr:$src),
3643            (VMOVUPSZ256rm addr:$src)>;
3644  def : Pat<(loadv32i8 addr:$src),
3645            (VMOVDQU64Z256rm addr:$src)>;
3646
3647  // 256-bit store.
3648  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3649            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3650  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3651            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3652  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3653            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3654  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3655            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3656  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3657            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3658  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3659            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3660  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3661            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3662  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3663            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3664  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3665            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3666  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3667            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3668}
3669
3670multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3671let Predicates = [HasBWI] in {
3672  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3673            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3674  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3675            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3676  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3677                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3678            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3679  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3680                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3681            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3682  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3683                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3684            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3685  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3686                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3687            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3688  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3689            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3690  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3691            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3692  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3693            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3694
3695  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3696            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3697}
3698let Predicates = [HasBWI, HasVLX] in {
3699  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3700            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3701  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3702            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3703  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3704                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3705            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3706  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3707                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3708            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3709  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3710                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3711            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3712  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3713                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3714            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3715  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3716            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3717  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3718            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3719  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3720            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3721
3722  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3723            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3724
3725  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3726            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3727  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3728            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3729  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3730                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3731            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3732  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3733                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3734            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3735  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3736                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3737            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3738  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3739                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3740            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3741  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3742            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3743  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3744            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3745  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3746            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3747
3748  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3749            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3750}
3751}
3752
3753defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3754defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3755
3756// Move Int Doubleword to Packed Double Int
3757//
3758let ExeDomain = SSEPackedInt in {
3759def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3760                      "vmovd\t{$src, $dst|$dst, $src}",
3761                      [(set VR128X:$dst,
3762                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3763                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3764def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3765                      "vmovd\t{$src, $dst|$dst, $src}",
3766                      [(set VR128X:$dst,
3767                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3768                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3769def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3770                      "vmovq\t{$src, $dst|$dst, $src}",
3771                        [(set VR128X:$dst,
3772                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3773                      EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3774let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3775def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3776                      (ins i64mem:$src),
3777                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3778                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3779let isCodeGenOnly = 1 in {
3780def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3781                       "vmovq\t{$src, $dst|$dst, $src}",
3782                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3783                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3784def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3785                         "vmovq\t{$src, $dst|$dst, $src}",
3786                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3787                         EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3788}
3789} // ExeDomain = SSEPackedInt
3790
3791// Move Int Doubleword to Single Scalar
3792//
3793let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3794def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3795                      "vmovd\t{$src, $dst|$dst, $src}",
3796                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3797                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3798} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3799
3800// Move doubleword from xmm register to r/m32
3801//
3802let ExeDomain = SSEPackedInt in {
3803def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3804                       "vmovd\t{$src, $dst|$dst, $src}",
3805                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3806                                        (iPTR 0)))]>,
3807                       EVEX, Sched<[WriteVecMoveToGpr]>;
3808def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3809                       (ins i32mem:$dst, VR128X:$src),
3810                       "vmovd\t{$src, $dst|$dst, $src}",
3811                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3812                                     (iPTR 0))), addr:$dst)]>,
3813                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3814} // ExeDomain = SSEPackedInt
3815
3816// Move quadword from xmm1 register to r/m64
3817//
3818let ExeDomain = SSEPackedInt in {
3819def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3820                      "vmovq\t{$src, $dst|$dst, $src}",
3821                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3822                                                   (iPTR 0)))]>,
3823                      TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3824                      Requires<[HasAVX512]>;
3825
3826let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3827def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3828                      "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3829                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3830                      Requires<[HasAVX512, In64BitMode]>;
3831
3832def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3833                      (ins i64mem:$dst, VR128X:$src),
3834                      "vmovq\t{$src, $dst|$dst, $src}",
3835                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3836                              addr:$dst)]>,
3837                      EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3838                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3839
3840let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3841def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3842                             (ins VR128X:$src),
3843                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3844                             EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3845} // ExeDomain = SSEPackedInt
3846
3847def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3848                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3849
3850let Predicates = [HasAVX512] in {
3851  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3852            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3853}
3854
3855// Move Scalar Single to Double Int
3856//
3857let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3858def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3859                      (ins FR32X:$src),
3860                      "vmovd\t{$src, $dst|$dst, $src}",
3861                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3862                      EVEX, Sched<[WriteVecMoveToGpr]>;
3863} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3864
3865// Move Quadword Int to Packed Quadword Int
3866//
3867let ExeDomain = SSEPackedInt in {
3868def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3869                      (ins i64mem:$src),
3870                      "vmovq\t{$src, $dst|$dst, $src}",
3871                      [(set VR128X:$dst,
3872                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3873                      EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3874} // ExeDomain = SSEPackedInt
3875
3876// Allow "vmovd" but print "vmovq".
3877def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3878                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3879def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3880                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3881
3882// Conversions between masks and scalar fp.
3883def : Pat<(v32i1 (bitconvert FR32X:$src)),
3884          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3885def : Pat<(f32 (bitconvert VK32:$src)),
3886          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3887
3888def : Pat<(v64i1 (bitconvert FR64X:$src)),
3889          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3890def : Pat<(f64 (bitconvert VK64:$src)),
3891          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3892
3893//===----------------------------------------------------------------------===//
3894// AVX-512  MOVSH, MOVSS, MOVSD
3895//===----------------------------------------------------------------------===//
3896
3897multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3898                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
3899  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3900  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3901             (ins _.RC:$src1, _.RC:$src2),
3902             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3903             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3904             _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3905  let Predicates = [prd] in {
3906  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3907              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3908              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3909              "$dst {${mask}} {z}, $src1, $src2}"),
3910              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3911                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3912                                      _.ImmAllZerosV)))],
3913              _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3914  let Constraints = "$src0 = $dst"  in
3915  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3916             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3917             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3918             "$dst {${mask}}, $src1, $src2}"),
3919             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3920                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3921                                     (_.VT _.RC:$src0))))],
3922             _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3923  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3924  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3925             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3926             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3927             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3928  // _alt version uses FR32/FR64 register class.
3929  let isCodeGenOnly = 1 in
3930  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3931                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3932                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3933                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3934  }
3935  let mayLoad = 1, hasSideEffects = 0 in {
3936    let Constraints = "$src0 = $dst" in
3937    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3938               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3939               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3940               "$dst {${mask}}, $src}"),
3941               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3942    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3943               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3944               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3945               "$dst {${mask}} {z}, $src}"),
3946               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3947  }
3948  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3949             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3950             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3951             EVEX, Sched<[WriteFStore]>;
3952  let mayStore = 1, hasSideEffects = 0 in
3953  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3954              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3955              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3956              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3957  }
3958}
3959
3960defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3961                                  VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3962
3963defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3964                                  VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3965
3966defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3967                                  HasFP16>,
3968                                  VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3969
3970multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3971                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3972
3973def : Pat<(_.VT (OpNode _.RC:$src0,
3974                        (_.VT (scalar_to_vector
3975                                  (_.EltVT (X86selects VK1WM:$mask,
3976                                                       (_.EltVT _.FRC:$src1),
3977                                                       (_.EltVT _.FRC:$src2))))))),
3978          (!cast<Instruction>(InstrStr#rrk)
3979                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3980                        VK1WM:$mask,
3981                        (_.VT _.RC:$src0),
3982                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3983
3984def : Pat<(_.VT (OpNode _.RC:$src0,
3985                        (_.VT (scalar_to_vector
3986                                  (_.EltVT (X86selects VK1WM:$mask,
3987                                                       (_.EltVT _.FRC:$src1),
3988                                                       (_.EltVT ZeroFP))))))),
3989          (!cast<Instruction>(InstrStr#rrkz)
3990                        VK1WM:$mask,
3991                        (_.VT _.RC:$src0),
3992                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3993}
3994
3995multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3996                                        dag Mask, RegisterClass MaskRC> {
3997
3998def : Pat<(masked_store
3999             (_.info512.VT (insert_subvector undef,
4000                               (_.info128.VT _.info128.RC:$src),
4001                               (iPTR 0))), addr:$dst, Mask),
4002          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4003                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4004                      _.info128.RC:$src)>;
4005
4006}
4007
4008multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4009                                               AVX512VLVectorVTInfo _,
4010                                               dag Mask, RegisterClass MaskRC,
4011                                               SubRegIndex subreg> {
4012
4013def : Pat<(masked_store
4014             (_.info512.VT (insert_subvector undef,
4015                               (_.info128.VT _.info128.RC:$src),
4016                               (iPTR 0))), addr:$dst, Mask),
4017          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4018                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4019                      _.info128.RC:$src)>;
4020
4021}
4022
4023// This matches the more recent codegen from clang that avoids emitting a 512
4024// bit masked store directly. Codegen will widen 128-bit masked store to 512
4025// bits on AVX512F only targets.
4026multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4027                                               AVX512VLVectorVTInfo _,
4028                                               dag Mask512, dag Mask128,
4029                                               RegisterClass MaskRC,
4030                                               SubRegIndex subreg> {
4031
4032// AVX512F pattern.
4033def : Pat<(masked_store
4034             (_.info512.VT (insert_subvector undef,
4035                               (_.info128.VT _.info128.RC:$src),
4036                               (iPTR 0))), addr:$dst, Mask512),
4037          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4038                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4039                      _.info128.RC:$src)>;
4040
4041// AVX512VL pattern.
4042def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4043          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4044                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4045                      _.info128.RC:$src)>;
4046}
4047
4048multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4049                                       dag Mask, RegisterClass MaskRC> {
4050
4051def : Pat<(_.info128.VT (extract_subvector
4052                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053                                        _.info512.ImmAllZerosV)),
4054                           (iPTR 0))),
4055          (!cast<Instruction>(InstrStr#rmkz)
4056                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4057                      addr:$srcAddr)>;
4058
4059def : Pat<(_.info128.VT (extract_subvector
4060                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061                      (_.info512.VT (insert_subvector undef,
4062                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4063                            (iPTR 0))))),
4064                (iPTR 0))),
4065          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4067                      addr:$srcAddr)>;
4068
4069}
4070
4071multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4072                                              AVX512VLVectorVTInfo _,
4073                                              dag Mask, RegisterClass MaskRC,
4074                                              SubRegIndex subreg> {
4075
4076def : Pat<(_.info128.VT (extract_subvector
4077                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4078                                        _.info512.ImmAllZerosV)),
4079                           (iPTR 0))),
4080          (!cast<Instruction>(InstrStr#rmkz)
4081                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4082                      addr:$srcAddr)>;
4083
4084def : Pat<(_.info128.VT (extract_subvector
4085                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4086                      (_.info512.VT (insert_subvector undef,
4087                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4088                            (iPTR 0))))),
4089                (iPTR 0))),
4090          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4091                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4092                      addr:$srcAddr)>;
4093
4094}
4095
4096// This matches the more recent codegen from clang that avoids emitting a 512
4097// bit masked load directly. Codegen will widen 128-bit masked load to 512
4098// bits on AVX512F only targets.
4099multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4100                                              AVX512VLVectorVTInfo _,
4101                                              dag Mask512, dag Mask128,
4102                                              RegisterClass MaskRC,
4103                                              SubRegIndex subreg> {
4104// AVX512F patterns.
4105def : Pat<(_.info128.VT (extract_subvector
4106                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4107                                        _.info512.ImmAllZerosV)),
4108                           (iPTR 0))),
4109          (!cast<Instruction>(InstrStr#rmkz)
4110                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4111                      addr:$srcAddr)>;
4112
4113def : Pat<(_.info128.VT (extract_subvector
4114                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4115                      (_.info512.VT (insert_subvector undef,
4116                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4117                            (iPTR 0))))),
4118                (iPTR 0))),
4119          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4120                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4121                      addr:$srcAddr)>;
4122
4123// AVX512Vl patterns.
4124def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4125                         _.info128.ImmAllZerosV)),
4126          (!cast<Instruction>(InstrStr#rmkz)
4127                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128                      addr:$srcAddr)>;
4129
4130def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4131                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4132          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4133                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4134                      addr:$srcAddr)>;
4135}
4136
4137defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4138defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4139
4140defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4141                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4142defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4143                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4144defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4145                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4146
4147let Predicates = [HasFP16] in {
4148defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4149defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154                   (v32i1 (insert_subvector
4155                           (v32i1 immAllZerosV),
4156                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4157                           (iPTR 0))),
4158                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                   GR8, sub_8bit>;
4160
4161defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4162                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4163defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4164                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4165defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4166                   (v32i1 (insert_subvector
4167                           (v32i1 immAllZerosV),
4168                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4169                           (iPTR 0))),
4170                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4171                   GR8, sub_8bit>;
4172
4173def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4174          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4175           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4176           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4177           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4178
4179def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4180          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4181           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4182}
4183
4184defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185                   (v16i1 (insert_subvector
4186                           (v16i1 immAllZerosV),
4187                           (v4i1 (extract_subvector
4188                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4189                                  (iPTR 0))),
4190                           (iPTR 0))),
4191                   (v4i1 (extract_subvector
4192                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193                          (iPTR 0))), GR8, sub_8bit>;
4194defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4195                   (v8i1
4196                    (extract_subvector
4197                     (v16i1
4198                      (insert_subvector
4199                       (v16i1 immAllZerosV),
4200                       (v2i1 (extract_subvector
4201                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                              (iPTR 0))),
4203                       (iPTR 0))),
4204                     (iPTR 0))),
4205                   (v2i1 (extract_subvector
4206                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207                          (iPTR 0))), GR8, sub_8bit>;
4208
4209defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4215
4216defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217                   (v16i1 (insert_subvector
4218                           (v16i1 immAllZerosV),
4219                           (v4i1 (extract_subvector
4220                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4221                                  (iPTR 0))),
4222                           (iPTR 0))),
4223                   (v4i1 (extract_subvector
4224                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225                          (iPTR 0))), GR8, sub_8bit>;
4226defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4227                   (v8i1
4228                    (extract_subvector
4229                     (v16i1
4230                      (insert_subvector
4231                       (v16i1 immAllZerosV),
4232                       (v2i1 (extract_subvector
4233                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                              (iPTR 0))),
4235                       (iPTR 0))),
4236                     (iPTR 0))),
4237                   (v2i1 (extract_subvector
4238                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239                          (iPTR 0))), GR8, sub_8bit>;
4240
4241def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4246
4247def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4250
4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4252          (COPY_TO_REGCLASS
4253           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4254                                                       VK1WM:$mask, addr:$src)),
4255           FR32X)>;
4256def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4257          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4258
4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4260          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4261           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4262           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4263           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4264
4265def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4266          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4267           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4268
4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4270          (COPY_TO_REGCLASS
4271           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4272                                                       VK1WM:$mask, addr:$src)),
4273           FR64X)>;
4274def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4275          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4276
4277
4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4279          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4281          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4282
4283def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4284          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4285def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4286          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4287
4288let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4289  let Predicates = [HasFP16] in {
4290    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291        (ins VR128X:$src1, VR128X:$src2),
4292        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293        []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4294        Sched<[SchedWriteFShuffle.XMM]>;
4295
4296    let Constraints = "$src0 = $dst" in
4297    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4299         VR128X:$src1, VR128X:$src2),
4300        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4301          "$dst {${mask}}, $src1, $src2}",
4302        []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4303        Sched<[SchedWriteFShuffle.XMM]>;
4304
4305    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4307        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4308          "$dst {${mask}} {z}, $src1, $src2}",
4309        []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4310        Sched<[SchedWriteFShuffle.XMM]>;
4311  }
4312  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4313                           (ins VR128X:$src1, VR128X:$src2),
4314                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315                           []>, TB, XS, EVEX, VVVV, VEX_LIG,
4316                           Sched<[SchedWriteFShuffle.XMM]>;
4317
4318  let Constraints = "$src0 = $dst" in
4319  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4320                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4321                                                   VR128X:$src1, VR128X:$src2),
4322                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4323                                        "$dst {${mask}}, $src1, $src2}",
4324                             []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4325                             Sched<[SchedWriteFShuffle.XMM]>;
4326
4327  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4328                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4329                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4330                                    "$dst {${mask}} {z}, $src1, $src2}",
4331                         []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4332                         Sched<[SchedWriteFShuffle.XMM]>;
4333
4334  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4335                           (ins VR128X:$src1, VR128X:$src2),
4336                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337                           []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4338                           Sched<[SchedWriteFShuffle.XMM]>;
4339
4340  let Constraints = "$src0 = $dst" in
4341  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4342                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4343                                                   VR128X:$src1, VR128X:$src2),
4344                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4345                                        "$dst {${mask}}, $src1, $src2}",
4346                             []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4347                             REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4348
4349  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4350                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4351                                                          VR128X:$src2),
4352                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                                         "$dst {${mask}} {z}, $src1, $src2}",
4354                              []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4355                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4356}
4357
4358def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4359                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4360def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4361                             "$dst {${mask}}, $src1, $src2}",
4362                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4363                                VR128X:$src1, VR128X:$src2), 0>;
4364def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4365                             "$dst {${mask}} {z}, $src1, $src2}",
4366                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4367                                 VR128X:$src1, VR128X:$src2), 0>;
4368def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4369                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4370def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4371                             "$dst {${mask}}, $src1, $src2}",
4372                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4373                                VR128X:$src1, VR128X:$src2), 0>;
4374def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4375                             "$dst {${mask}} {z}, $src1, $src2}",
4376                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4377                                 VR128X:$src1, VR128X:$src2), 0>;
4378def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4379                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4380def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4381                             "$dst {${mask}}, $src1, $src2}",
4382                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4383                                VR128X:$src1, VR128X:$src2), 0>;
4384def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4385                             "$dst {${mask}} {z}, $src1, $src2}",
4386                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4387                                 VR128X:$src1, VR128X:$src2), 0>;
4388
4389let Predicates = [HasAVX512, OptForSize] in {
4390  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4391            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4392  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4393            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4394
4395  // Move low f32 and clear high bits.
4396  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4397            (SUBREG_TO_REG (i32 0),
4398             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4399              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4400  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4401            (SUBREG_TO_REG (i32 0),
4402             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4403              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4404
4405  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4406            (SUBREG_TO_REG (i32 0),
4407             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4408              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4409  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4410            (SUBREG_TO_REG (i32 0),
4411             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4412              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4413}
4414
4415// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4416// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4417let Predicates = [HasAVX512, OptForSpeed] in {
4418  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4419            (SUBREG_TO_REG (i32 0),
4420             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4421                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4422                          (i8 1))), sub_xmm)>;
4423  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4424            (SUBREG_TO_REG (i32 0),
4425             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4426                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4427                          (i8 3))), sub_xmm)>;
4428}
4429
4430let Predicates = [HasAVX512] in {
4431  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4432            (VMOVSSZrm addr:$src)>;
4433  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4434            (VMOVSDZrm addr:$src)>;
4435
4436  // Represent the same patterns above but in the form they appear for
4437  // 256-bit types
4438  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4439            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4440  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4441            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4442
4443  // Represent the same patterns above but in the form they appear for
4444  // 512-bit types
4445  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4446            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4447  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4448            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4449}
4450let Predicates = [HasFP16] in {
4451  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4452            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4453  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4454            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4455
4456  // FIXME we need better canonicalization in dag combine
4457  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4458            (SUBREG_TO_REG (i32 0),
4459             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4460              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4461  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4462            (SUBREG_TO_REG (i32 0),
4463             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4464              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4465
4466  // FIXME we need better canonicalization in dag combine
4467  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4468            (SUBREG_TO_REG (i32 0),
4469             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4470              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4471  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4472            (SUBREG_TO_REG (i32 0),
4473             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4474              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4475
4476  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4477            (VMOVSHZrm addr:$src)>;
4478
4479  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4480            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4481
4482  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4483            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4484}
4485
4486let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4487def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4488                                (ins VR128X:$src),
4489                                "vmovq\t{$src, $dst|$dst, $src}",
4490                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4491                                                   (v2i64 VR128X:$src))))]>,
4492                                EVEX, REX_W;
4493}
4494
4495let Predicates = [HasAVX512] in {
4496  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4497            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4498                                              GR8:$src, sub_8bit)))>;
4499  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4500            (VMOVDI2PDIZrr GR32:$src)>;
4501
4502  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4503            (VMOV64toPQIZrr GR64:$src)>;
4504
4505  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4506  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4507            (VMOVDI2PDIZrm addr:$src)>;
4508  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4509            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4510  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4511            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4512  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4513            (VMOVQI2PQIZrm addr:$src)>;
4514  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4515            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4516
4517  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4518  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4519            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4520  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4521            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4522
4523  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4524            (SUBREG_TO_REG (i32 0),
4525             (v2f64 (VMOVZPQILo2PQIZrr
4526                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4527             sub_xmm)>;
4528  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4529            (SUBREG_TO_REG (i32 0),
4530             (v2i64 (VMOVZPQILo2PQIZrr
4531                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4532             sub_xmm)>;
4533
4534  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4535            (SUBREG_TO_REG (i32 0),
4536             (v2f64 (VMOVZPQILo2PQIZrr
4537                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4538             sub_xmm)>;
4539  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4540            (SUBREG_TO_REG (i32 0),
4541             (v2i64 (VMOVZPQILo2PQIZrr
4542                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4543             sub_xmm)>;
4544}
4545
4546//===----------------------------------------------------------------------===//
4547// AVX-512 - Non-temporals
4548//===----------------------------------------------------------------------===//
4549
4550def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4551                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4552                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4553                      EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4554
4555let Predicates = [HasVLX] in {
4556  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4557                       (ins i256mem:$src),
4558                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4559                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4560                       EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4561
4562  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4563                      (ins i128mem:$src),
4564                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4565                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4566                      EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4567}
4568
4569multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4570                        X86SchedWriteMoveLS Sched,
4571                        PatFrag st_frag = alignednontemporalstore> {
4572  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4573  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4574                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4575                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4576                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4577}
4578
4579multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4580                           AVX512VLVectorVTInfo VTInfo,
4581                           X86SchedWriteMoveLSWidths Sched> {
4582  let Predicates = [HasAVX512] in
4583    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4584
4585  let Predicates = [HasAVX512, HasVLX] in {
4586    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4587    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4588  }
4589}
4590
4591defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4592                                SchedWriteVecMoveLSNT>, TB, PD;
4593defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4594                                SchedWriteFMoveLSNT>, TB, PD, REX_W;
4595defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4596                                SchedWriteFMoveLSNT>, TB;
4597
4598let Predicates = [HasAVX512], AddedComplexity = 400 in {
4599  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4600            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4601  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4602            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4603  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4604            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4605
4606  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4607            (VMOVNTDQAZrm addr:$src)>;
4608  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4609            (VMOVNTDQAZrm addr:$src)>;
4610  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4611            (VMOVNTDQAZrm addr:$src)>;
4612  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4613            (VMOVNTDQAZrm addr:$src)>;
4614  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4615            (VMOVNTDQAZrm addr:$src)>;
4616  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4617            (VMOVNTDQAZrm addr:$src)>;
4618}
4619
4620let Predicates = [HasVLX], AddedComplexity = 400 in {
4621  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4622            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4623  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4624            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4625  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4626            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4627
4628  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4629            (VMOVNTDQAZ256rm addr:$src)>;
4630  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4631            (VMOVNTDQAZ256rm addr:$src)>;
4632  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4633            (VMOVNTDQAZ256rm addr:$src)>;
4634  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4635            (VMOVNTDQAZ256rm addr:$src)>;
4636  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4637            (VMOVNTDQAZ256rm addr:$src)>;
4638  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4639            (VMOVNTDQAZ256rm addr:$src)>;
4640
4641  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4642            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4643  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4644            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4645  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4646            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4647
4648  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4649            (VMOVNTDQAZ128rm addr:$src)>;
4650  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4651            (VMOVNTDQAZ128rm addr:$src)>;
4652  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4653            (VMOVNTDQAZ128rm addr:$src)>;
4654  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4655            (VMOVNTDQAZ128rm addr:$src)>;
4656  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4657            (VMOVNTDQAZ128rm addr:$src)>;
4658  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4659            (VMOVNTDQAZ128rm addr:$src)>;
4660}
4661
4662//===----------------------------------------------------------------------===//
4663// AVX-512 - Integer arithmetic
4664//
4665multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4667                           bit IsCommutable = 0> {
4668  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670                    "$src2, $src1", "$src1, $src2",
4671                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4672                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4673                    Sched<[sched]>;
4674
4675  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4677                  "$src2, $src1", "$src1, $src2",
4678                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4679                  AVX512BIBase, EVEX, VVVV,
4680                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4681}
4682
4683multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4684                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4685                            bit IsCommutable = 0> :
4686           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4687  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4688                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4689                  "${src2}"#_.BroadcastStr#", $src1",
4690                  "$src1, ${src2}"#_.BroadcastStr,
4691                  (_.VT (OpNode _.RC:$src1,
4692                                (_.BroadcastLdFrag addr:$src2)))>,
4693                  AVX512BIBase, EVEX, VVVV, EVEX_B,
4694                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4695}
4696
4697multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698                              AVX512VLVectorVTInfo VTInfo,
4699                              X86SchedWriteWidths sched, Predicate prd,
4700                              bit IsCommutable = 0> {
4701  let Predicates = [prd] in
4702    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4703                             IsCommutable>, EVEX_V512;
4704
4705  let Predicates = [prd, HasVLX] in {
4706    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4707                                sched.YMM, IsCommutable>, EVEX_V256;
4708    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4709                                sched.XMM, IsCommutable>, EVEX_V128;
4710  }
4711}
4712
4713multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4714                               AVX512VLVectorVTInfo VTInfo,
4715                               X86SchedWriteWidths sched, Predicate prd,
4716                               bit IsCommutable = 0> {
4717  let Predicates = [prd] in
4718    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4719                             IsCommutable>, EVEX_V512;
4720
4721  let Predicates = [prd, HasVLX] in {
4722    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4723                                 sched.YMM, IsCommutable>, EVEX_V256;
4724    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4725                                 sched.XMM, IsCommutable>, EVEX_V128;
4726  }
4727}
4728
4729multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4730                                X86SchedWriteWidths sched, Predicate prd,
4731                                bit IsCommutable = 0> {
4732  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4733                                  sched, prd, IsCommutable>,
4734                                  REX_W, EVEX_CD8<64, CD8VF>;
4735}
4736
4737multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4738                                X86SchedWriteWidths sched, Predicate prd,
4739                                bit IsCommutable = 0> {
4740  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4741                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4742}
4743
4744multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4745                                X86SchedWriteWidths sched, Predicate prd,
4746                                bit IsCommutable = 0> {
4747  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4748                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4749                                 WIG;
4750}
4751
4752multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753                                X86SchedWriteWidths sched, Predicate prd,
4754                                bit IsCommutable = 0> {
4755  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4756                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4757                                 WIG;
4758}
4759
4760multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4761                                 SDNode OpNode, X86SchedWriteWidths sched,
4762                                 Predicate prd, bit IsCommutable = 0> {
4763  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4764                                   IsCommutable>;
4765
4766  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4767                                   IsCommutable>;
4768}
4769
4770multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4771                                 SDNode OpNode, X86SchedWriteWidths sched,
4772                                 Predicate prd, bit IsCommutable = 0> {
4773  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4774                                   IsCommutable>;
4775
4776  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4777                                   IsCommutable>;
4778}
4779
4780multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4781                                  bits<8> opc_d, bits<8> opc_q,
4782                                  string OpcodeStr, SDNode OpNode,
4783                                  X86SchedWriteWidths sched,
4784                                  bit IsCommutable = 0> {
4785  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4786                                    sched, HasAVX512, IsCommutable>,
4787              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4788                                    sched, HasBWI, IsCommutable>;
4789}
4790
4791multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4792                            X86FoldableSchedWrite sched,
4793                            SDNode OpNode,X86VectorVTInfo _Src,
4794                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4795                            bit IsCommutable = 0> {
4796  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4797                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4798                            "$src2, $src1","$src1, $src2",
4799                            (_Dst.VT (OpNode
4800                                         (_Src.VT _Src.RC:$src1),
4801                                         (_Src.VT _Src.RC:$src2))),
4802                            IsCommutable>,
4803                            AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4804  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4805                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4806                        "$src2, $src1", "$src1, $src2",
4807                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4808                                      (_Src.LdFrag addr:$src2)))>,
4809                        AVX512BIBase, EVEX, VVVV,
4810                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4811
4812  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4813                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4814                    OpcodeStr,
4815                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4816                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4817                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4818                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4819                    AVX512BIBase, EVEX, VVVV, EVEX_B,
4820                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4821}
4822
4823defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4824                                    SchedWriteVecALU, 1>;
4825defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4826                                    SchedWriteVecALU, 0>;
4827defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4828                                    SchedWriteVecALU, HasBWI, 1>;
4829defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4830                                    SchedWriteVecALU, HasBWI, 0>;
4831defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4832                                     SchedWriteVecALU, HasBWI, 1>;
4833defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4834                                     SchedWriteVecALU, HasBWI, 0>;
4835defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4836                                    SchedWritePMULLD, HasAVX512, 1>, T8;
4837defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4838                                    SchedWriteVecIMul, HasBWI, 1>;
4839defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4840                                    SchedWriteVecIMul, HasDQI, 1>, T8;
4841defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4842                                    HasBWI, 1>;
4843defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4844                                     HasBWI, 1>;
4845defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4846                                      SchedWriteVecIMul, HasBWI, 1>, T8;
4847defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4848                                   SchedWriteVecALU, HasBWI, 1>;
4849defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4850                                    SchedWriteVecIMul, HasAVX512, 1>, T8;
4851defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4852                                     SchedWriteVecIMul, HasAVX512, 1>;
4853
4854multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4855                            X86SchedWriteWidths sched,
4856                            AVX512VLVectorVTInfo _SrcVTInfo,
4857                            AVX512VLVectorVTInfo _DstVTInfo,
4858                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4859  let Predicates = [prd] in
4860    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4861                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4862                                 v8i64_info, IsCommutable>,
4863                                  EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4864  let Predicates = [HasVLX, prd] in {
4865    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4866                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4867                                      v4i64x_info, IsCommutable>,
4868                                      EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4869    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4870                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4871                                      v2i64x_info, IsCommutable>,
4872                                     EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4873  }
4874}
4875
4876defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4877                                avx512vl_i8_info, avx512vl_i8_info,
4878                                X86multishift, HasVBMI, 0>, T8;
4879
4880multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4882                            X86FoldableSchedWrite sched> {
4883  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4884                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4885                    OpcodeStr,
4886                    "${src2}"#_Src.BroadcastStr#", $src1",
4887                     "$src1, ${src2}"#_Src.BroadcastStr,
4888                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4889                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4890                    EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4891                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4892}
4893
4894multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4895                            SDNode OpNode,X86VectorVTInfo _Src,
4896                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4897                            bit IsCommutable = 0> {
4898  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4899                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4900                            "$src2, $src1","$src1, $src2",
4901                            (_Dst.VT (OpNode
4902                                         (_Src.VT _Src.RC:$src1),
4903                                         (_Src.VT _Src.RC:$src2))),
4904                            IsCommutable, IsCommutable>,
4905                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4906  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4907                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4908                        "$src2, $src1", "$src1, $src2",
4909                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4910                                      (_Src.LdFrag addr:$src2)))>,
4911                         EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4912                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4913}
4914
4915multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4916                                    SDNode OpNode> {
4917  let Predicates = [HasBWI] in
4918  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4919                                 v32i16_info, SchedWriteShuffle.ZMM>,
4920                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4921                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4922  let Predicates = [HasBWI, HasVLX] in {
4923    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4924                                     v16i16x_info, SchedWriteShuffle.YMM>,
4925                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4926                                      v16i16x_info, SchedWriteShuffle.YMM>,
4927                                      EVEX_V256;
4928    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4929                                     v8i16x_info, SchedWriteShuffle.XMM>,
4930                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4931                                      v8i16x_info, SchedWriteShuffle.XMM>,
4932                                      EVEX_V128;
4933  }
4934}
4935multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4936                            SDNode OpNode> {
4937  let Predicates = [HasBWI] in
4938  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4939                                SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4940  let Predicates = [HasBWI, HasVLX] in {
4941    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4942                                     v32i8x_info, SchedWriteShuffle.YMM>,
4943                                     EVEX_V256, WIG;
4944    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4945                                     v16i8x_info, SchedWriteShuffle.XMM>,
4946                                     EVEX_V128, WIG;
4947  }
4948}
4949
4950multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4951                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4952                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4953  let Predicates = [HasBWI] in
4954  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4955                                _Dst.info512, SchedWriteVecIMul.ZMM,
4956                                IsCommutable>, EVEX_V512;
4957  let Predicates = [HasBWI, HasVLX] in {
4958    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4959                                     _Dst.info256, SchedWriteVecIMul.YMM,
4960                                     IsCommutable>, EVEX_V256;
4961    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4962                                     _Dst.info128, SchedWriteVecIMul.XMM,
4963                                     IsCommutable>, EVEX_V128;
4964  }
4965}
4966
4967defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4968defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4969defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4970defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4971
4972defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4973                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4974defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4975                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4976
4977defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4978                                    SchedWriteVecALU, HasBWI, 1>, T8;
4979defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4980                                    SchedWriteVecALU, HasBWI, 1>;
4981defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4982                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4983defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4984                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4985
4986defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4987                                    SchedWriteVecALU, HasBWI, 1>;
4988defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4989                                    SchedWriteVecALU, HasBWI, 1>, T8;
4990defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4991                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4992defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4993                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4994
4995defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4996                                    SchedWriteVecALU, HasBWI, 1>, T8;
4997defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4998                                    SchedWriteVecALU, HasBWI, 1>;
4999defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5000                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5001defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5002                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5003
5004defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5005                                    SchedWriteVecALU, HasBWI, 1>;
5006defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5007                                    SchedWriteVecALU, HasBWI, 1>, T8;
5008defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5009                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5010defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5011                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5012
5013// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5014let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
5015  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5016            (EXTRACT_SUBREG
5017                (VPMULLQZrr
5018                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5019                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5020             sub_ymm)>;
5021  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5022            (EXTRACT_SUBREG
5023                (VPMULLQZrmb
5024                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5025                    addr:$src2),
5026             sub_ymm)>;
5027
5028  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5029            (EXTRACT_SUBREG
5030                (VPMULLQZrr
5031                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5032                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5033             sub_xmm)>;
5034  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5035            (EXTRACT_SUBREG
5036                (VPMULLQZrmb
5037                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5038                    addr:$src2),
5039             sub_xmm)>;
5040}
5041
5042multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5043  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5044            (EXTRACT_SUBREG
5045                (!cast<Instruction>(Instr#"rr")
5046                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5047                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5048             sub_ymm)>;
5049  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5050            (EXTRACT_SUBREG
5051                (!cast<Instruction>(Instr#"rmb")
5052                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5053                    addr:$src2),
5054             sub_ymm)>;
5055
5056  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5057            (EXTRACT_SUBREG
5058                (!cast<Instruction>(Instr#"rr")
5059                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5060                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5061             sub_xmm)>;
5062  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5063            (EXTRACT_SUBREG
5064                (!cast<Instruction>(Instr#"rmb")
5065                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5066                    addr:$src2),
5067             sub_xmm)>;
5068}
5069
5070let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
5071  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5072  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5073  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5074  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5075}
5076
5077//===----------------------------------------------------------------------===//
5078// AVX-512  Logical Instructions
5079//===----------------------------------------------------------------------===//
5080
5081defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5082                                   SchedWriteVecLogic, HasAVX512, 1>;
5083defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5084                                  SchedWriteVecLogic, HasAVX512, 1>;
5085defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5086                                   SchedWriteVecLogic, HasAVX512, 1>;
5087defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5088                                    SchedWriteVecLogic, HasAVX512>;
5089
5090let Predicates = [HasVLX] in {
5091  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5092            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5093  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5094            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5095
5096  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5097            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5098  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5099            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5100
5101  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5102            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5103  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5104            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5105
5106  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5107            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5108  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5109            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5110
5111  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5112            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5113  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5114            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5115
5116  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5117            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5118  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5119            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5120
5121  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5122            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5123  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5124            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5125
5126  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5127            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5128  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5129            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5130
5131  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5132            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5133  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5134            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5135
5136  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5137            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5138  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5139            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5140
5141  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5142            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5143  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5144            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5145
5146  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5147            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5148  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5149            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5150
5151  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5152            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5153  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5154            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5155
5156  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5157            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5158  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5159            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5160
5161  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5162            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5163  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5164            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5165
5166  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5167            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5168  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5169            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5170}
5171
5172let Predicates = [HasAVX512] in {
5173  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5174            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5175  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5176            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5177
5178  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5179            (VPORQZrr VR512:$src1, VR512:$src2)>;
5180  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5181            (VPORQZrr VR512:$src1, VR512:$src2)>;
5182
5183  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5184            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5185  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5186            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5187
5188  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5189            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5190  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5191            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5192
5193  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5194            (VPANDQZrm VR512:$src1, addr:$src2)>;
5195  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5196            (VPANDQZrm VR512:$src1, addr:$src2)>;
5197
5198  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5199            (VPORQZrm VR512:$src1, addr:$src2)>;
5200  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5201            (VPORQZrm VR512:$src1, addr:$src2)>;
5202
5203  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5204            (VPXORQZrm VR512:$src1, addr:$src2)>;
5205  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5206            (VPXORQZrm VR512:$src1, addr:$src2)>;
5207
5208  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5209            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5210  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5211            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5212}
5213
5214// Patterns to catch vselect with different type than logic op.
5215multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5216                                    X86VectorVTInfo _,
5217                                    X86VectorVTInfo IntInfo> {
5218  // Masked register-register logical operations.
5219  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5220                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5221                   _.RC:$src0)),
5222            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5223             _.RC:$src1, _.RC:$src2)>;
5224
5225  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5226                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5227                   _.ImmAllZerosV)),
5228            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5229             _.RC:$src2)>;
5230
5231  // Masked register-memory logical operations.
5232  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5233                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5234                                            (load addr:$src2)))),
5235                   _.RC:$src0)),
5236            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5237             _.RC:$src1, addr:$src2)>;
5238  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5240                                            (load addr:$src2)))),
5241                   _.ImmAllZerosV)),
5242            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5243             addr:$src2)>;
5244}
5245
5246multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5247                                         X86VectorVTInfo _,
5248                                         X86VectorVTInfo IntInfo> {
5249  // Register-broadcast logical operations.
5250  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5251                   (bitconvert
5252                    (IntInfo.VT (OpNode _.RC:$src1,
5253                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5254                   _.RC:$src0)),
5255            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5256             _.RC:$src1, addr:$src2)>;
5257  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5258                   (bitconvert
5259                    (IntInfo.VT (OpNode _.RC:$src1,
5260                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5261                   _.ImmAllZerosV)),
5262            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5263             _.RC:$src1, addr:$src2)>;
5264}
5265
5266multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5267                                         AVX512VLVectorVTInfo SelectInfo,
5268                                         AVX512VLVectorVTInfo IntInfo> {
5269let Predicates = [HasVLX] in {
5270  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5271                                 IntInfo.info128>;
5272  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5273                                 IntInfo.info256>;
5274}
5275let Predicates = [HasAVX512] in {
5276  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5277                                 IntInfo.info512>;
5278}
5279}
5280
5281multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5282                                               AVX512VLVectorVTInfo SelectInfo,
5283                                               AVX512VLVectorVTInfo IntInfo> {
5284let Predicates = [HasVLX] in {
5285  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5286                                       SelectInfo.info128, IntInfo.info128>;
5287  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5288                                       SelectInfo.info256, IntInfo.info256>;
5289}
5290let Predicates = [HasAVX512] in {
5291  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5292                                       SelectInfo.info512, IntInfo.info512>;
5293}
5294}
5295
5296multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5297  // i64 vselect with i32/i16/i8 logic op
5298  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5299                                       avx512vl_i32_info>;
5300  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5301                                       avx512vl_i16_info>;
5302  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5303                                       avx512vl_i8_info>;
5304
5305  // i32 vselect with i64/i16/i8 logic op
5306  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5307                                       avx512vl_i64_info>;
5308  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5309                                       avx512vl_i16_info>;
5310  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5311                                       avx512vl_i8_info>;
5312
5313  // f32 vselect with i64/i32/i16/i8 logic op
5314  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5315                                       avx512vl_i64_info>;
5316  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5317                                       avx512vl_i32_info>;
5318  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5319                                       avx512vl_i16_info>;
5320  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5321                                       avx512vl_i8_info>;
5322
5323  // f64 vselect with i64/i32/i16/i8 logic op
5324  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5325                                       avx512vl_i64_info>;
5326  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5327                                       avx512vl_i32_info>;
5328  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5329                                       avx512vl_i16_info>;
5330  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5331                                       avx512vl_i8_info>;
5332
5333  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5334                                             avx512vl_f32_info,
5335                                             avx512vl_i32_info>;
5336  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5337                                             avx512vl_f64_info,
5338                                             avx512vl_i64_info>;
5339}
5340
5341defm : avx512_logical_lowering_types<"VPAND", and>;
5342defm : avx512_logical_lowering_types<"VPOR",  or>;
5343defm : avx512_logical_lowering_types<"VPXOR", xor>;
5344defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5345
5346//===----------------------------------------------------------------------===//
5347// AVX-512  FP arithmetic
5348//===----------------------------------------------------------------------===//
5349
5350multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5351                            SDPatternOperator OpNode, SDNode VecNode,
5352                            X86FoldableSchedWrite sched, bit IsCommutable> {
5353  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5354  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5355                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5356                           "$src2, $src1", "$src1, $src2",
5357                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5358                           Sched<[sched]>;
5359
5360  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5361                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5362                         "$src2, $src1", "$src1, $src2",
5363                         (_.VT (VecNode _.RC:$src1,
5364                                        (_.ScalarIntMemFrags addr:$src2)))>,
5365                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5366  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5367  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5368                         (ins _.FRC:$src1, _.FRC:$src2),
5369                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5370                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5371                          Sched<[sched]> {
5372    let isCommutable = IsCommutable;
5373  }
5374  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5375                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5376                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5377                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5378                         (_.ScalarLdFrag addr:$src2)))]>,
5379                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5380  }
5381  }
5382}
5383
5384multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5385                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5386  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5387  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5388                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5389                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5390                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5391                          (i32 timm:$rc))>,
5392                          EVEX_B, EVEX_RC, Sched<[sched]>;
5393}
5394multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5395                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5396                                X86FoldableSchedWrite sched, bit IsCommutable> {
5397  let ExeDomain = _.ExeDomain in {
5398  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5399                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5400                           "$src2, $src1", "$src1, $src2",
5401                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5402                           Sched<[sched]>, SIMD_EXC;
5403
5404  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5405                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5406                         "$src2, $src1", "$src1, $src2",
5407                         (_.VT (VecNode _.RC:$src1,
5408                                        (_.ScalarIntMemFrags addr:$src2)))>,
5409                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5410
5411  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5412      Uses = [MXCSR], mayRaiseFPException = 1 in {
5413  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5414                         (ins _.FRC:$src1, _.FRC:$src2),
5415                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5416                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5417                          Sched<[sched]> {
5418    let isCommutable = IsCommutable;
5419  }
5420  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5421                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5422                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5423                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5424                         (_.ScalarLdFrag addr:$src2)))]>,
5425                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5426  }
5427
5428  let Uses = [MXCSR] in
5429  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5430                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5431                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5432                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5433                            EVEX_B, Sched<[sched]>;
5434  }
5435}
5436
5437multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5438                                SDNode VecNode, SDNode RndNode,
5439                                X86SchedWriteSizes sched, bit IsCommutable> {
5440  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5441                              sched.PS.Scl, IsCommutable>,
5442             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5443                              sched.PS.Scl>,
5444                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5445  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5446                              sched.PD.Scl, IsCommutable>,
5447             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5448                              sched.PD.Scl>,
5449                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5450  let Predicates = [HasFP16] in
5451    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5452                                VecNode, sched.PH.Scl, IsCommutable>,
5453               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5454                                sched.PH.Scl>,
5455                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5456}
5457
5458multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5459                              SDNode VecNode, SDNode SaeNode,
5460                              X86SchedWriteSizes sched, bit IsCommutable> {
5461  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5462                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5463                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5464  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5465                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5466                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5467  let Predicates = [HasFP16] in {
5468    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5469                                VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5470                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5471  }
5472}
5473defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5474                                 SchedWriteFAddSizes, 1>;
5475defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5476                                 SchedWriteFMulSizes, 1>;
5477defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5478                                 SchedWriteFAddSizes, 0>;
5479defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5480                                 SchedWriteFDivSizes, 0>;
5481defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5482                               SchedWriteFCmpSizes, 0>;
5483defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5484                               SchedWriteFCmpSizes, 0>;
5485
5486// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5487// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5488multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5489                                    X86VectorVTInfo _, SDNode OpNode,
5490                                    X86FoldableSchedWrite sched> {
5491  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5492  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5493                         (ins _.FRC:$src1, _.FRC:$src2),
5494                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5495                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5496                          Sched<[sched]> {
5497    let isCommutable = 1;
5498  }
5499  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5500                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5501                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5502                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5503                         (_.ScalarLdFrag addr:$src2)))]>,
5504                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5505  }
5506}
5507defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5508                                         SchedWriteFCmp.Scl>, TB, XS,
5509                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5510
5511defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5512                                         SchedWriteFCmp.Scl>, TB, XD,
5513                                         REX_W, EVEX, VVVV, VEX_LIG,
5514                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5515
5516defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5517                                         SchedWriteFCmp.Scl>, TB, XS,
5518                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5519
5520defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5521                                         SchedWriteFCmp.Scl>, TB, XD,
5522                                         REX_W, EVEX, VVVV, VEX_LIG,
5523                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5524
5525defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5526                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5527                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5528
5529defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5530                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5531                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5532
5533multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5534                            SDPatternOperator MaskOpNode,
5535                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5536                            bit IsCommutable,
5537                            bit IsKCommutable = IsCommutable,
5538                            string suffix = _.Suffix,
5539                            string ClobberConstraint = "",
5540                            bit MayRaiseFPException = 1> {
5541  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5542      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5543  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5544                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5545                                 "$src2, $src1", "$src1, $src2",
5546                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5547                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5548                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5549  let mayLoad = 1 in {
5550    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5551                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5552                                   "$src2, $src1", "$src1, $src2",
5553                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5554                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5555                                   ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5556    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5557                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5558                                    "${src2}"#_.BroadcastStr#", $src1",
5559                                    "$src1, ${src2}"#_.BroadcastStr,
5560                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5561                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5562                                    ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5563    }
5564  }
5565}
5566
5567multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5568                                  SDPatternOperator OpNodeRnd,
5569                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5570                                  string suffix = _.Suffix,
5571                                  string ClobberConstraint = ""> {
5572  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5575                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5576                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5577                  0, 0, 0, vselect_mask, ClobberConstraint>,
5578                  EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5579}
5580
5581multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5582                                SDPatternOperator OpNodeSAE,
5583                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5587                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5588                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5589                  EVEX, VVVV, EVEX_B, Sched<[sched]>;
5590}
5591
5592multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5593                             SDPatternOperator MaskOpNode,
5594                             Predicate prd, X86SchedWriteSizes sched,
5595                             bit IsCommutable = 0,
5596                             bit IsPD128Commutable = IsCommutable> {
5597  let Predicates = [prd] in {
5598  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5599                              sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5600                              EVEX_CD8<32, CD8VF>;
5601  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5602                              sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5603                              EVEX_CD8<64, CD8VF>;
5604  }
5605
5606    // Define only if AVX512VL feature is present.
5607  let Predicates = [prd, HasVLX] in {
5608    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5609                                   sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5610                                   EVEX_CD8<32, CD8VF>;
5611    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5612                                   sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5613                                   EVEX_CD8<32, CD8VF>;
5614    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5615                                   sched.PD.XMM, IsPD128Commutable,
5616                                   IsCommutable>, EVEX_V128, TB, PD, REX_W,
5617                                   EVEX_CD8<64, CD8VF>;
5618    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5619                                   sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5620                                   EVEX_CD8<64, CD8VF>;
5621  }
5622}
5623
5624multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5625                              SDPatternOperator MaskOpNode,
5626                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5627  let Predicates = [HasFP16] in {
5628    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5629                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5630                                EVEX_CD8<16, CD8VF>;
5631  }
5632  let Predicates = [HasVLX, HasFP16] in {
5633    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5634                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5635                                   EVEX_CD8<16, CD8VF>;
5636    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5637                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5638                                   EVEX_CD8<16, CD8VF>;
5639  }
5640}
5641
5642let Uses = [MXCSR] in
5643multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5644                                   X86SchedWriteSizes sched> {
5645  let Predicates = [HasFP16] in {
5646    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5647                                      v32f16_info>,
5648                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5649  }
5650  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5651                                    v16f32_info>,
5652                                    EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5653  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5654                                    v8f64_info>,
5655                                    EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5656}
5657
5658let Uses = [MXCSR] in
5659multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5660                                 X86SchedWriteSizes sched> {
5661  let Predicates = [HasFP16] in {
5662    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5663                                    v32f16_info>,
5664                                    EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5665  }
5666  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5667                                  v16f32_info>,
5668                                  EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5669  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5670                                  v8f64_info>,
5671                                  EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5672}
5673
5674defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5675                              SchedWriteFAddSizes, 1>,
5676            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5677            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5678defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5679                              SchedWriteFMulSizes, 1>,
5680            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5681            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5682defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5683                              SchedWriteFAddSizes>,
5684            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5685            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5686defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5687                              SchedWriteFDivSizes>,
5688            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5689            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5690defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5691                              SchedWriteFCmpSizes, 0>,
5692            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5693            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5694defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5695                              SchedWriteFCmpSizes, 0>,
5696            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5697            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5698let isCodeGenOnly = 1 in {
5699  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5700                                 SchedWriteFCmpSizes, 1>,
5701               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5702                                 SchedWriteFCmpSizes, 1>;
5703  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5704                                 SchedWriteFCmpSizes, 1>,
5705               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5706                                 SchedWriteFCmpSizes, 1>;
5707}
5708let Uses = []<Register>, mayRaiseFPException = 0 in {
5709defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5710                               SchedWriteFLogicSizes, 1>;
5711defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5712                               SchedWriteFLogicSizes, 0>;
5713defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5714                               SchedWriteFLogicSizes, 1>;
5715defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5716                               SchedWriteFLogicSizes, 1>;
5717}
5718
5719multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5720                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5721  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5722  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5723                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5724                  "$src2, $src1", "$src1, $src2",
5725                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5726                  EVEX, VVVV, Sched<[sched]>;
5727  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5728                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5729                  "$src2, $src1", "$src1, $src2",
5730                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5731                  EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5732  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5733                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5734                   "${src2}"#_.BroadcastStr#", $src1",
5735                   "$src1, ${src2}"#_.BroadcastStr,
5736                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5737                   EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5738  }
5739}
5740
5741multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5742                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5743  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5744  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5745                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5746                  "$src2, $src1", "$src1, $src2",
5747                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5748                  Sched<[sched]>;
5749  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5750                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5751                  "$src2, $src1", "$src1, $src2",
5752                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5753                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5754  }
5755}
5756
5757multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5758                                X86SchedWriteWidths sched> {
5759  let Predicates = [HasFP16] in {
5760    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5761               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5762                                EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5763    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5764               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5765                             EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5766  }
5767  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5768             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5769                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5770  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5771             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5772                              EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5773  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5774             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5775                                    X86scalefsRnd, sched.Scl>,
5776                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5777  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5778             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5779                                    X86scalefsRnd, sched.Scl>,
5780                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5781
5782  // Define only if AVX512VL feature is present.
5783  let Predicates = [HasVLX] in {
5784    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5785                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5786    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5787                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5788    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5789                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5790    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5791                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5792  }
5793
5794  let Predicates = [HasFP16, HasVLX] in {
5795    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5796                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5797    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5798                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5799  }
5800}
5801defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5802
5803//===----------------------------------------------------------------------===//
5804// AVX-512  VPTESTM instructions
5805//===----------------------------------------------------------------------===//
5806
5807multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5808                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5809  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5810  // There are just too many permutations due to commutability and bitcasts.
5811  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5812  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5813                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5814                      "$src2, $src1", "$src1, $src2",
5815                   (null_frag), (null_frag), 1>,
5816                   EVEX, VVVV, Sched<[sched]>;
5817  let mayLoad = 1 in
5818  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5820                       "$src2, $src1", "$src1, $src2",
5821                   (null_frag), (null_frag)>,
5822                   EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5823                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5824  }
5825}
5826
5827multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5828                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5829  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5830  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5831                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5832                    "${src2}"#_.BroadcastStr#", $src1",
5833                    "$src1, ${src2}"#_.BroadcastStr,
5834                    (null_frag), (null_frag)>,
5835                    EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5836                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5837}
5838
5839multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5840                                  X86SchedWriteWidths sched,
5841                                  AVX512VLVectorVTInfo _> {
5842  let Predicates  = [HasAVX512] in
5843  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5844           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5845
5846  let Predicates = [HasAVX512, HasVLX] in {
5847  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5848              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5849  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5850              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5851  }
5852}
5853
5854multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5855                            X86SchedWriteWidths sched> {
5856  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5857                                 avx512vl_i32_info>;
5858  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5859                                 avx512vl_i64_info>, REX_W;
5860}
5861
5862multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5863                            X86SchedWriteWidths sched> {
5864  let Predicates = [HasBWI] in {
5865  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5866                            v32i16_info>, EVEX_V512, REX_W;
5867  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5868                            v64i8_info>, EVEX_V512;
5869  }
5870
5871  let Predicates = [HasVLX, HasBWI] in {
5872  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5873                            v16i16x_info>, EVEX_V256, REX_W;
5874  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5875                            v8i16x_info>, EVEX_V128, REX_W;
5876  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5877                            v32i8x_info>, EVEX_V256;
5878  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5879                            v16i8x_info>, EVEX_V128;
5880  }
5881}
5882
5883multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5884                                   X86SchedWriteWidths sched> :
5885  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5886  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5887
5888defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5889                                         SchedWriteVecLogic>, T8, PD;
5890defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5891                                         SchedWriteVecLogic>, T8, XS;
5892
5893//===----------------------------------------------------------------------===//
5894// AVX-512  Shift instructions
5895//===----------------------------------------------------------------------===//
5896
5897multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5898                            string OpcodeStr, SDNode OpNode,
5899                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5900  let ExeDomain = _.ExeDomain in {
5901  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5902                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5903                      "$src2, $src1", "$src1, $src2",
5904                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5905                   Sched<[sched]>;
5906  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5907                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5908                       "$src2, $src1", "$src1, $src2",
5909                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5910                          (i8 timm:$src2)))>,
5911                   Sched<[sched.Folded]>;
5912  }
5913}
5914
5915multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5916                             string OpcodeStr, SDNode OpNode,
5917                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5918  let ExeDomain = _.ExeDomain in
5919  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5920                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5921      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5922     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5923     EVEX_B, Sched<[sched.Folded]>;
5924}
5925
5926multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927                            X86FoldableSchedWrite sched, ValueType SrcVT,
5928                            X86VectorVTInfo _> {
5929   // src2 is always 128-bit
5930  let ExeDomain = _.ExeDomain in {
5931  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5933                      "$src2, $src1", "$src1, $src2",
5934                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5935                   AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5936  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5938                       "$src2, $src1", "$src1, $src2",
5939                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5940                   AVX512BIBase,
5941                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5942  }
5943}
5944
5945multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5946                              X86SchedWriteWidths sched, ValueType SrcVT,
5947                              AVX512VLVectorVTInfo VTInfo,
5948                              Predicate prd> {
5949  let Predicates = [prd] in
5950  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5951                               VTInfo.info512>, EVEX_V512,
5952                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5953  let Predicates = [prd, HasVLX] in {
5954  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5955                               VTInfo.info256>, EVEX_V256,
5956                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5957  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5958                               VTInfo.info128>, EVEX_V128,
5959                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5960  }
5961}
5962
5963multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5964                              string OpcodeStr, SDNode OpNode,
5965                              X86SchedWriteWidths sched> {
5966  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5967                              avx512vl_i32_info, HasAVX512>;
5968  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5969                              avx512vl_i64_info, HasAVX512>, REX_W;
5970  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5971                              avx512vl_i16_info, HasBWI>;
5972}
5973
5974multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5975                                  string OpcodeStr, SDNode OpNode,
5976                                  X86SchedWriteWidths sched,
5977                                  AVX512VLVectorVTInfo VTInfo> {
5978  let Predicates = [HasAVX512] in
5979  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5980                              sched.ZMM, VTInfo.info512>,
5981             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5982                               VTInfo.info512>, EVEX_V512;
5983  let Predicates = [HasAVX512, HasVLX] in {
5984  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5985                              sched.YMM, VTInfo.info256>,
5986             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5987                               VTInfo.info256>, EVEX_V256;
5988  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5989                              sched.XMM, VTInfo.info128>,
5990             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5991                               VTInfo.info128>, EVEX_V128;
5992  }
5993}
5994
5995multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5996                              string OpcodeStr, SDNode OpNode,
5997                              X86SchedWriteWidths sched> {
5998  let Predicates = [HasBWI] in
5999  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6000                               sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6001  let Predicates = [HasVLX, HasBWI] in {
6002  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6003                               sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6004  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6005                               sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6006  }
6007}
6008
6009multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6010                               Format ImmFormR, Format ImmFormM,
6011                               string OpcodeStr, SDNode OpNode,
6012                               X86SchedWriteWidths sched> {
6013  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6014                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6015  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6016                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6017}
6018
6019defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6020                                 SchedWriteVecShiftImm>,
6021             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6022                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6023
6024defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6025                                 SchedWriteVecShiftImm>,
6026             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6027                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6028
6029defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6030                                 SchedWriteVecShiftImm>,
6031             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6032                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6033
6034defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6035                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6036defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6037                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6038
6039defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6040                                SchedWriteVecShift>;
6041defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6042                                SchedWriteVecShift>;
6043defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6044                                SchedWriteVecShift>;
6045
6046// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6047let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6048  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6049            (EXTRACT_SUBREG (v8i64
6050              (VPSRAQZrr
6051                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052                 VR128X:$src2)), sub_ymm)>;
6053
6054  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6055            (EXTRACT_SUBREG (v8i64
6056              (VPSRAQZrr
6057                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                 VR128X:$src2)), sub_xmm)>;
6059
6060  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6061            (EXTRACT_SUBREG (v8i64
6062              (VPSRAQZri
6063                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064                 timm:$src2)), sub_ymm)>;
6065
6066  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6067            (EXTRACT_SUBREG (v8i64
6068              (VPSRAQZri
6069                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6070                 timm:$src2)), sub_xmm)>;
6071}
6072
6073//===-------------------------------------------------------------------===//
6074// Variable Bit Shifts
6075//===-------------------------------------------------------------------===//
6076
6077multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6078                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6079  let ExeDomain = _.ExeDomain in {
6080  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6081                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6082                      "$src2, $src1", "$src1, $src2",
6083                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6084                   AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6085  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6086                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6087                       "$src2, $src1", "$src1, $src2",
6088                   (_.VT (OpNode _.RC:$src1,
6089                   (_.VT (_.LdFrag addr:$src2))))>,
6090                   AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6091                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6092  }
6093}
6094
6095multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6097  let ExeDomain = _.ExeDomain in
6098  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6100                    "${src2}"#_.BroadcastStr#", $src1",
6101                    "$src1, ${src2}"#_.BroadcastStr,
6102                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6103                    AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6104                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6105}
6106
6107multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6108                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6109  let Predicates  = [HasAVX512] in
6110  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6111           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6112
6113  let Predicates = [HasAVX512, HasVLX] in {
6114  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6115              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6116  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6117              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6118  }
6119}
6120
6121multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6122                                  SDNode OpNode, X86SchedWriteWidths sched> {
6123  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6124                                 avx512vl_i32_info>;
6125  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6126                                 avx512vl_i64_info>, REX_W;
6127}
6128
6129// Use 512bit version to implement 128/256 bit in case NoVLX.
6130multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6131                                     SDNode OpNode, list<Predicate> p> {
6132  let Predicates = p in {
6133  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6134                                  (_.info256.VT _.info256.RC:$src2))),
6135            (EXTRACT_SUBREG
6136                (!cast<Instruction>(OpcodeStr#"Zrr")
6137                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6138                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6139             sub_ymm)>;
6140
6141  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6142                                  (_.info128.VT _.info128.RC:$src2))),
6143            (EXTRACT_SUBREG
6144                (!cast<Instruction>(OpcodeStr#"Zrr")
6145                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6146                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6147             sub_xmm)>;
6148  }
6149}
6150multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6151                              SDNode OpNode, X86SchedWriteWidths sched> {
6152  let Predicates = [HasBWI] in
6153  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6154              EVEX_V512, REX_W;
6155  let Predicates = [HasVLX, HasBWI] in {
6156
6157  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6158              EVEX_V256, REX_W;
6159  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6160              EVEX_V128, REX_W;
6161  }
6162}
6163
6164defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6165              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6166
6167defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6168              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6169
6170defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6171              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6172
6173defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6174defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6175
6176defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6177defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6178defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6179defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
6180
6181
6182// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6183let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6184  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6185            (EXTRACT_SUBREG (v8i64
6186              (VPROLVQZrr
6187                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6188                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6189                       sub_xmm)>;
6190  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6191            (EXTRACT_SUBREG (v8i64
6192              (VPROLVQZrr
6193                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6194                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6195                       sub_ymm)>;
6196
6197  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6198            (EXTRACT_SUBREG (v16i32
6199              (VPROLVDZrr
6200                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6201                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6202                        sub_xmm)>;
6203  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6204            (EXTRACT_SUBREG (v16i32
6205              (VPROLVDZrr
6206                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6208                        sub_ymm)>;
6209
6210  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6211            (EXTRACT_SUBREG (v8i64
6212              (VPROLQZri
6213                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6214                        timm:$src2)), sub_xmm)>;
6215  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6216            (EXTRACT_SUBREG (v8i64
6217              (VPROLQZri
6218                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6219                       timm:$src2)), sub_ymm)>;
6220
6221  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6222            (EXTRACT_SUBREG (v16i32
6223              (VPROLDZri
6224                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6225                        timm:$src2)), sub_xmm)>;
6226  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6227            (EXTRACT_SUBREG (v16i32
6228              (VPROLDZri
6229                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6230                        timm:$src2)), sub_ymm)>;
6231}
6232
6233// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6234let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6235  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6236            (EXTRACT_SUBREG (v8i64
6237              (VPRORVQZrr
6238                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6239                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6240                       sub_xmm)>;
6241  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6242            (EXTRACT_SUBREG (v8i64
6243              (VPRORVQZrr
6244                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6245                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6246                       sub_ymm)>;
6247
6248  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6249            (EXTRACT_SUBREG (v16i32
6250              (VPRORVDZrr
6251                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6252                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6253                        sub_xmm)>;
6254  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6255            (EXTRACT_SUBREG (v16i32
6256              (VPRORVDZrr
6257                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6259                        sub_ymm)>;
6260
6261  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6262            (EXTRACT_SUBREG (v8i64
6263              (VPRORQZri
6264                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6265                        timm:$src2)), sub_xmm)>;
6266  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6267            (EXTRACT_SUBREG (v8i64
6268              (VPRORQZri
6269                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6270                       timm:$src2)), sub_ymm)>;
6271
6272  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6273            (EXTRACT_SUBREG (v16i32
6274              (VPRORDZri
6275                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6276                        timm:$src2)), sub_xmm)>;
6277  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6278            (EXTRACT_SUBREG (v16i32
6279              (VPRORDZri
6280                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281                        timm:$src2)), sub_ymm)>;
6282}
6283
6284//===-------------------------------------------------------------------===//
6285// 1-src variable permutation VPERMW/D/Q
6286//===-------------------------------------------------------------------===//
6287
6288multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6290  let Predicates  = [HasAVX512] in
6291  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6292           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6293
6294  let Predicates = [HasAVX512, HasVLX] in
6295  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6296              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6297}
6298
6299multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6300                                 string OpcodeStr, SDNode OpNode,
6301                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6302  let Predicates = [HasAVX512] in
6303  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6304                              sched, VTInfo.info512>,
6305             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6306                               sched, VTInfo.info512>, EVEX_V512;
6307  let Predicates = [HasAVX512, HasVLX] in
6308  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6309                              sched, VTInfo.info256>,
6310             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6311                               sched, VTInfo.info256>, EVEX_V256;
6312}
6313
6314multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6315                              Predicate prd, SDNode OpNode,
6316                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6317  let Predicates = [prd] in
6318  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6319              EVEX_V512 ;
6320  let Predicates = [HasVLX, prd] in {
6321  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6322              EVEX_V256 ;
6323  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6324              EVEX_V128 ;
6325  }
6326}
6327
6328defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6329                               WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6330defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6331                               WriteVarShuffle256, avx512vl_i8_info>;
6332
6333defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6334                                    WriteVarShuffle256, avx512vl_i32_info>;
6335defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6336                                    WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6337defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6338                                     WriteFVarShuffle256, avx512vl_f32_info>;
6339defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6340                                     WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6341
6342defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6343                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6344                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6345defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6346                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6347                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6348
6349//===----------------------------------------------------------------------===//
6350// AVX-512 - VPERMIL
6351//===----------------------------------------------------------------------===//
6352
6353multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6354                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6355                             X86VectorVTInfo Ctrl> {
6356  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6357                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6358                  "$src2, $src1", "$src1, $src2",
6359                  (_.VT (OpNode _.RC:$src1,
6360                               (Ctrl.VT Ctrl.RC:$src2)))>,
6361                  T8, PD, EVEX, VVVV, Sched<[sched]>;
6362  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6363                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6364                  "$src2, $src1", "$src1, $src2",
6365                  (_.VT (OpNode
6366                           _.RC:$src1,
6367                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6368                  T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6369                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6370  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6371                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6372                   "${src2}"#_.BroadcastStr#", $src1",
6373                   "$src1, ${src2}"#_.BroadcastStr,
6374                   (_.VT (OpNode
6375                            _.RC:$src1,
6376                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6377                   T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6378                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6379}
6380
6381multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6382                                    X86SchedWriteWidths sched,
6383                                    AVX512VLVectorVTInfo _,
6384                                    AVX512VLVectorVTInfo Ctrl> {
6385  let Predicates = [HasAVX512] in {
6386    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6387                                  _.info512, Ctrl.info512>, EVEX_V512;
6388  }
6389  let Predicates = [HasAVX512, HasVLX] in {
6390    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6391                                  _.info128, Ctrl.info128>, EVEX_V128;
6392    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6393                                  _.info256, Ctrl.info256>, EVEX_V256;
6394  }
6395}
6396
6397multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6398                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6399  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6400                                      _, Ctrl>;
6401  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6402                                    X86VPermilpi, SchedWriteFShuffle, _>,
6403                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6404}
6405
6406let ExeDomain = SSEPackedSingle in
6407defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6408                               avx512vl_i32_info>;
6409let ExeDomain = SSEPackedDouble in
6410defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6411                               avx512vl_i64_info>, REX_W;
6412
6413//===----------------------------------------------------------------------===//
6414// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6415//===----------------------------------------------------------------------===//
6416
6417defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6418                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6419                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6420defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6421                                  X86PShufhw, SchedWriteShuffle>,
6422                                  EVEX, AVX512XSIi8Base;
6423defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6424                                  X86PShuflw, SchedWriteShuffle>,
6425                                  EVEX, AVX512XDIi8Base;
6426
6427//===----------------------------------------------------------------------===//
6428// AVX-512 - VPSHUFB
6429//===----------------------------------------------------------------------===//
6430
6431multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6432                               X86SchedWriteWidths sched> {
6433  let Predicates = [HasBWI] in
6434  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6435                              EVEX_V512;
6436
6437  let Predicates = [HasVLX, HasBWI] in {
6438  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6439                              EVEX_V256;
6440  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6441                              EVEX_V128;
6442  }
6443}
6444
6445defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6446                                  SchedWriteVarShuffle>, WIG;
6447
6448//===----------------------------------------------------------------------===//
6449// Move Low to High and High to Low packed FP Instructions
6450//===----------------------------------------------------------------------===//
6451
6452def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6453          (ins VR128X:$src1, VR128X:$src2),
6454          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6455          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6456          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6457let isCommutable = 1 in
6458def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6459          (ins VR128X:$src1, VR128X:$src2),
6460          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6461          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6462          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6463
6464//===----------------------------------------------------------------------===//
6465// VMOVHPS/PD VMOVLPS Instructions
6466// All patterns was taken from SSS implementation.
6467//===----------------------------------------------------------------------===//
6468
6469multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6470                                  SDPatternOperator OpNode,
6471                                  X86VectorVTInfo _> {
6472  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6473  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6474                  (ins _.RC:$src1, f64mem:$src2),
6475                  !strconcat(OpcodeStr,
6476                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6477                  [(set _.RC:$dst,
6478                     (OpNode _.RC:$src1,
6479                       (_.VT (bitconvert
6480                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6481                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6482}
6483
6484// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6485// SSE1. And MOVLPS pattern is even more complex.
6486defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6487                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6488defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6489                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6490defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6491                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6492defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6493                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6494
6495let Predicates = [HasAVX512] in {
6496  // VMOVHPD patterns
6497  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6498            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6499
6500  // VMOVLPD patterns
6501  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6502            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6503}
6504
6505let SchedRW = [WriteFStore] in {
6506let mayStore = 1, hasSideEffects = 0 in
6507def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6508                       (ins f64mem:$dst, VR128X:$src),
6509                       "vmovhps\t{$src, $dst|$dst, $src}",
6510                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6512                       (ins f64mem:$dst, VR128X:$src),
6513                       "vmovhpd\t{$src, $dst|$dst, $src}",
6514                       [(store (f64 (extractelt
6515                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6516                                     (iPTR 0))), addr:$dst)]>,
6517                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6518let mayStore = 1, hasSideEffects = 0 in
6519def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6520                       (ins f64mem:$dst, VR128X:$src),
6521                       "vmovlps\t{$src, $dst|$dst, $src}",
6522                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6523def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6524                       (ins f64mem:$dst, VR128X:$src),
6525                       "vmovlpd\t{$src, $dst|$dst, $src}",
6526                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6527                                     (iPTR 0))), addr:$dst)]>,
6528                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6529} // SchedRW
6530
6531let Predicates = [HasAVX512] in {
6532  // VMOVHPD patterns
6533  def : Pat<(store (f64 (extractelt
6534                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6535                           (iPTR 0))), addr:$dst),
6536           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6537}
6538//===----------------------------------------------------------------------===//
6539// FMA - Fused Multiply Operations
6540//
6541
6542multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6543                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6544                               X86VectorVTInfo _> {
6545  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6546      Uses = [MXCSR], mayRaiseFPException = 1 in {
6547  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6548          (ins _.RC:$src2, _.RC:$src3),
6549          OpcodeStr, "$src3, $src2", "$src2, $src3",
6550          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6551          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6552          EVEX, VVVV, Sched<[sched]>;
6553
6554  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6555          (ins _.RC:$src2, _.MemOp:$src3),
6556          OpcodeStr, "$src3, $src2", "$src2, $src3",
6557          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6558          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6559          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6560                          sched.ReadAfterFold]>;
6561
6562  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6564            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6565            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6566            (OpNode _.RC:$src2,
6567             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6568            (MaskOpNode _.RC:$src2,
6569             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6570            EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6571                                    sched.ReadAfterFold]>;
6572  }
6573}
6574
6575multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6576                                 X86FoldableSchedWrite sched,
6577                                 X86VectorVTInfo _> {
6578  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6579      Uses = [MXCSR] in
6580  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6581          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6582          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6583          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6584          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6585          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6586}
6587
6588multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6589                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6590                                   X86SchedWriteWidths sched,
6591                                   AVX512VLVectorVTInfo _,
6592                                   Predicate prd = HasAVX512> {
6593  let Predicates = [prd] in {
6594    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6595                                      sched.ZMM, _.info512>,
6596                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6597                                        _.info512>,
6598                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6599  }
6600  let Predicates = [HasVLX, prd] in {
6601    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6602                                    sched.YMM, _.info256>,
6603                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6605                                    sched.XMM, _.info128>,
6606                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6607  }
6608}
6609
6610multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6611                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6612    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6613                                      OpNodeRnd, SchedWriteFMA,
6614                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6615    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6616                                      OpNodeRnd, SchedWriteFMA,
6617                                      avx512vl_f32_info>, T8, PD;
6618    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6619                                      OpNodeRnd, SchedWriteFMA,
6620                                      avx512vl_f64_info>, T8, PD, REX_W;
6621}
6622
6623defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6624                                       fma, X86FmaddRnd>;
6625defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6626                                       X86Fmsub, X86FmsubRnd>;
6627defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6628                                       X86Fmaddsub, X86FmaddsubRnd>;
6629defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6630                                       X86Fmsubadd, X86FmsubaddRnd>;
6631defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6632                                       X86Fnmadd, X86FnmaddRnd>;
6633defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6634                                       X86Fnmsub, X86FnmsubRnd>;
6635
6636
6637multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6638                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6639                               X86VectorVTInfo _> {
6640  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6641      Uses = [MXCSR], mayRaiseFPException = 1 in {
6642  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6643          (ins _.RC:$src2, _.RC:$src3),
6644          OpcodeStr, "$src3, $src2", "$src2, $src3",
6645          (null_frag),
6646          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6647          EVEX, VVVV, Sched<[sched]>;
6648
6649  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6650          (ins _.RC:$src2, _.MemOp:$src3),
6651          OpcodeStr, "$src3, $src2", "$src2, $src3",
6652          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6653          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6654          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6655                          sched.ReadAfterFold]>;
6656
6657  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6658         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6659         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6660         "$src2, ${src3}"#_.BroadcastStr,
6661         (_.VT (OpNode _.RC:$src2,
6662                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6663                      _.RC:$src1)),
6664         (_.VT (MaskOpNode _.RC:$src2,
6665                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6666                           _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6667         Sched<[sched.Folded, sched.ReadAfterFold,
6668                sched.ReadAfterFold]>;
6669  }
6670}
6671
6672multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6673                                 X86FoldableSchedWrite sched,
6674                                 X86VectorVTInfo _> {
6675  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6676      Uses = [MXCSR] in
6677  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6678          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6679          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6680          (null_frag),
6681          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6682          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6683}
6684
6685multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6686                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6687                                   X86SchedWriteWidths sched,
6688                                   AVX512VLVectorVTInfo _,
6689                                   Predicate prd = HasAVX512> {
6690  let Predicates = [prd] in {
6691    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6692                                      sched.ZMM, _.info512>,
6693                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6694                                        _.info512>,
6695                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6696  }
6697  let Predicates = [HasVLX, prd] in {
6698    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6699                                    sched.YMM, _.info256>,
6700                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6701    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6702                                    sched.XMM, _.info128>,
6703                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6704  }
6705}
6706
6707multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6708                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6709    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6710                                      OpNodeRnd, SchedWriteFMA,
6711                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6712    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6713                                      OpNodeRnd, SchedWriteFMA,
6714                                      avx512vl_f32_info>, T8, PD;
6715    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6716                                      OpNodeRnd, SchedWriteFMA,
6717                                      avx512vl_f64_info>, T8, PD, REX_W;
6718}
6719
6720defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6721                                       fma, X86FmaddRnd>;
6722defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6723                                       X86Fmsub, X86FmsubRnd>;
6724defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6725                                       X86Fmaddsub, X86FmaddsubRnd>;
6726defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6727                                       X86Fmsubadd, X86FmsubaddRnd>;
6728defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6729                                       X86Fnmadd, X86FnmaddRnd>;
6730defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6731                                       X86Fnmsub, X86FnmsubRnd>;
6732
6733multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6734                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6735                               X86VectorVTInfo _> {
6736  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6737      Uses = [MXCSR], mayRaiseFPException = 1 in {
6738  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6739          (ins _.RC:$src2, _.RC:$src3),
6740          OpcodeStr, "$src3, $src2", "$src2, $src3",
6741          (null_frag),
6742          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6743          EVEX, VVVV, Sched<[sched]>;
6744
6745  // Pattern is 312 order so that the load is in a different place from the
6746  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6747  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6748          (ins _.RC:$src2, _.MemOp:$src3),
6749          OpcodeStr, "$src3, $src2", "$src2, $src3",
6750          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6751          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6752          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6753                          sched.ReadAfterFold]>;
6754
6755  // Pattern is 312 order so that the load is in a different place from the
6756  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6757  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6758         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6759         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6760         "$src2, ${src3}"#_.BroadcastStr,
6761         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6762                       _.RC:$src1, _.RC:$src2)),
6763         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6764                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6765         EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6766                                 sched.ReadAfterFold]>;
6767  }
6768}
6769
6770multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6771                                 X86FoldableSchedWrite sched,
6772                                 X86VectorVTInfo _> {
6773  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6774      Uses = [MXCSR] in
6775  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6776          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6777          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6778          (null_frag),
6779          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6780          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6781}
6782
6783multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6784                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6785                                   X86SchedWriteWidths sched,
6786                                   AVX512VLVectorVTInfo _,
6787                                   Predicate prd = HasAVX512> {
6788  let Predicates = [prd] in {
6789    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6790                                      sched.ZMM, _.info512>,
6791                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6792                                        _.info512>,
6793                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6794  }
6795  let Predicates = [HasVLX, prd] in {
6796    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6797                                    sched.YMM, _.info256>,
6798                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6799    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6800                                    sched.XMM, _.info128>,
6801                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6802  }
6803}
6804
6805multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6806                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6807    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6808                                      OpNodeRnd, SchedWriteFMA,
6809                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6810    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6811                                      OpNodeRnd, SchedWriteFMA,
6812                                      avx512vl_f32_info>, T8, PD;
6813    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6814                                      OpNodeRnd, SchedWriteFMA,
6815                                      avx512vl_f64_info>, T8, PD, REX_W;
6816}
6817
6818defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6819                                       fma, X86FmaddRnd>;
6820defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6821                                       X86Fmsub, X86FmsubRnd>;
6822defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6823                                       X86Fmaddsub, X86FmaddsubRnd>;
6824defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6825                                       X86Fmsubadd, X86FmsubaddRnd>;
6826defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6827                                       X86Fnmadd, X86FnmaddRnd>;
6828defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6829                                       X86Fnmsub, X86FnmsubRnd>;
6830
6831// Scalar FMA
6832multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6833                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6834let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6835  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6836          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6837          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6838          EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6839
6840  let mayLoad = 1 in
6841  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6842          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6843          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6844          EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6845                          SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6846
6847  let Uses = [MXCSR] in
6848  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6849         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6850         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6851         EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6852
6853  let isCodeGenOnly = 1, isCommutable = 1 in {
6854    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6855                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6856                     !strconcat(OpcodeStr,
6857                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6858                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6859    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6860                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6861                    !strconcat(OpcodeStr,
6862                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6863                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6864                                     SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6865
6866    let Uses = [MXCSR] in
6867    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6868                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6869                     !strconcat(OpcodeStr,
6870                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6871                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6872                     Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6873  }// isCodeGenOnly = 1
6874}// Constraints = "$src1 = $dst"
6875}
6876
6877multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6878                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6879                            X86VectorVTInfo _, string SUFF> {
6880  let ExeDomain = _.ExeDomain in {
6881  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6882                // Operands for intrinsic are in 123 order to preserve passthu
6883                // semantics.
6884                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6885                         _.FRC:$src3))),
6886                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6887                         (_.ScalarLdFrag addr:$src3)))),
6888                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6889                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6890
6891  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6892                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6893                                          _.FRC:$src1))),
6894                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6895                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6896                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6897                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6898
6899  // One pattern is 312 order so that the load is in a different place from the
6900  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6901  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6902                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6903                         _.FRC:$src2))),
6904                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6905                                 _.FRC:$src1, _.FRC:$src2))),
6906                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6907                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6908  }
6909}
6910
6911multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6912                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6913  let Predicates = [HasAVX512] in {
6914    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6915                                 OpNodeRnd, f32x_info, "SS">,
6916                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6917    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6918                                 OpNodeRnd, f64x_info, "SD">,
6919                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6920  }
6921  let Predicates = [HasFP16] in {
6922    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6923                                 OpNodeRnd, f16x_info, "SH">,
6924                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6925  }
6926}
6927
6928defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6929defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6930defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6931defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6932
6933multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6934                                      SDNode RndOp, string Prefix,
6935                                      string Suffix, SDNode Move,
6936                                      X86VectorVTInfo _, PatLeaf ZeroFP,
6937                                      Predicate prd = HasAVX512> {
6938  let Predicates = [prd] in {
6939    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6940                (Op _.FRC:$src2,
6941                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                    _.FRC:$src3))))),
6943              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6944               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6945               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6946
6947    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6948                (Op _.FRC:$src2, _.FRC:$src3,
6949                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6950              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6951               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6952               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6953
6954    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955                (Op _.FRC:$src2,
6956                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957                    (_.ScalarLdFrag addr:$src3)))))),
6958              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6959               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6960               addr:$src3)>;
6961
6962    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6963                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6965              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6966               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6967               addr:$src3)>;
6968
6969    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6970                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6971                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6972              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6973               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6974               addr:$src3)>;
6975
6976    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6977               (X86selects_mask VK1WM:$mask,
6978                (MaskedOp _.FRC:$src2,
6979                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6980                    _.FRC:$src3),
6981                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6982              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6983               VR128X:$src1, VK1WM:$mask,
6984               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6985               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6986
6987    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6988               (X86selects_mask VK1WM:$mask,
6989                (MaskedOp _.FRC:$src2,
6990                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6991                    (_.ScalarLdFrag addr:$src3)),
6992                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6993              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6994               VR128X:$src1, VK1WM:$mask,
6995               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6996
6997    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6998               (X86selects_mask VK1WM:$mask,
6999                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7000                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7001                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7002              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7003               VR128X:$src1, VK1WM:$mask,
7004               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7005
7006    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7007               (X86selects_mask VK1WM:$mask,
7008                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7009                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7010                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7011              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7012               VR128X:$src1, VK1WM:$mask,
7013               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7014               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7015
7016    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7017               (X86selects_mask VK1WM:$mask,
7018                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7019                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7020                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7021              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7022               VR128X:$src1, VK1WM:$mask,
7023               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7024
7025    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7026               (X86selects_mask VK1WM:$mask,
7027                (MaskedOp _.FRC:$src2,
7028                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7029                          _.FRC:$src3),
7030                (_.EltVT ZeroFP)))))),
7031              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7032               VR128X:$src1, VK1WM:$mask,
7033               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7034               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7035
7036    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7037               (X86selects_mask VK1WM:$mask,
7038                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7039                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7040                (_.EltVT ZeroFP)))))),
7041              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7042               VR128X:$src1, VK1WM:$mask,
7043               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7044               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7045
7046    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7047               (X86selects_mask VK1WM:$mask,
7048                (MaskedOp _.FRC:$src2,
7049                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7050                          (_.ScalarLdFrag addr:$src3)),
7051                (_.EltVT ZeroFP)))))),
7052              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7053               VR128X:$src1, VK1WM:$mask,
7054               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7055
7056    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7057               (X86selects_mask VK1WM:$mask,
7058                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7059                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7060                (_.EltVT ZeroFP)))))),
7061              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7062               VR128X:$src1, VK1WM:$mask,
7063               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7064
7065    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7066               (X86selects_mask VK1WM:$mask,
7067                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7068                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7069                (_.EltVT ZeroFP)))))),
7070              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7071               VR128X:$src1, VK1WM:$mask,
7072               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7073
7074    // Patterns with rounding mode.
7075    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7076                (RndOp _.FRC:$src2,
7077                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7078                       _.FRC:$src3, (i32 timm:$rc)))))),
7079              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7080               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7081               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7082
7083    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7084                (RndOp _.FRC:$src2, _.FRC:$src3,
7085                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7086                       (i32 timm:$rc)))))),
7087              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7088               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7089               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7090
7091    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7092               (X86selects_mask VK1WM:$mask,
7093                (RndOp _.FRC:$src2,
7094                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7095                       _.FRC:$src3, (i32 timm:$rc)),
7096                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7097              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7098               VR128X:$src1, VK1WM:$mask,
7099               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7100               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7101
7102    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7103               (X86selects_mask VK1WM:$mask,
7104                (RndOp _.FRC:$src2, _.FRC:$src3,
7105                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7106                       (i32 timm:$rc)),
7107                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7108              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7109               VR128X:$src1, VK1WM:$mask,
7110               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7111               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7112
7113    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7114               (X86selects_mask VK1WM:$mask,
7115                (RndOp _.FRC:$src2,
7116                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117                       _.FRC:$src3, (i32 timm:$rc)),
7118                (_.EltVT ZeroFP)))))),
7119              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7120               VR128X:$src1, VK1WM:$mask,
7121               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7122               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7123
7124    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125               (X86selects_mask VK1WM:$mask,
7126                (RndOp _.FRC:$src2, _.FRC:$src3,
7127                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7128                       (i32 timm:$rc)),
7129                (_.EltVT ZeroFP)))))),
7130              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7131               VR128X:$src1, VK1WM:$mask,
7132               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7133               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7134  }
7135}
7136defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7137                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7138defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7139                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7140defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7141                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7142defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7143                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7144
7145defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7146                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7147defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7148                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7149defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7150                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7151defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7152                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7153
7154defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7155                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7156defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7157                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7158defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7159                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7160defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7161                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7162
7163//===----------------------------------------------------------------------===//
7164// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7165//===----------------------------------------------------------------------===//
7166let Constraints = "$src1 = $dst" in {
7167multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7168                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7169  // NOTE: The SDNode have the multiply operands first with the add last.
7170  // This enables commuted load patterns to be autogenerated by tablegen.
7171  let ExeDomain = _.ExeDomain in {
7172  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7173          (ins _.RC:$src2, _.RC:$src3),
7174          OpcodeStr, "$src3, $src2", "$src2, $src3",
7175          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7176          T8, PD, EVEX, VVVV, Sched<[sched]>;
7177
7178  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7179          (ins _.RC:$src2, _.MemOp:$src3),
7180          OpcodeStr, "$src3, $src2", "$src2, $src3",
7181          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7182          T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7183                                sched.ReadAfterFold]>;
7184
7185  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7186            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7187            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7188            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7189            (OpNode _.RC:$src2,
7190                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7191                    _.RC:$src1)>,
7192            T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7193                                          sched.ReadAfterFold]>;
7194  }
7195}
7196} // Constraints = "$src1 = $dst"
7197
7198multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7199                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7200  let Predicates = [HasIFMA] in {
7201    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7202                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7203  }
7204  let Predicates = [HasVLX, HasIFMA] in {
7205    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7206                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7207    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7208                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7209  }
7210}
7211
7212defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7213                                         SchedWriteVecIMul, avx512vl_i64_info>,
7214                                         REX_W;
7215defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7216                                         SchedWriteVecIMul, avx512vl_i64_info>,
7217                                         REX_W;
7218
7219//===----------------------------------------------------------------------===//
7220// AVX-512  Scalar convert from sign integer to float/double
7221//===----------------------------------------------------------------------===//
7222
7223multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7224                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7225                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7226                    string mem, list<Register> _Uses = [MXCSR],
7227                    bit _mayRaiseFPException = 1> {
7228let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7229    mayRaiseFPException = _mayRaiseFPException in {
7230  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7231    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7232              (ins DstVT.FRC:$src1, SrcRC:$src),
7233              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7234              EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7235    let mayLoad = 1 in
7236      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7237              (ins DstVT.FRC:$src1, x86memop:$src),
7238              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7239              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7240  } // hasSideEffects = 0
7241  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7242                (ins DstVT.RC:$src1, SrcRC:$src2),
7243                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7244                [(set DstVT.RC:$dst,
7245                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7246               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7247
7248  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7249                (ins DstVT.RC:$src1, x86memop:$src2),
7250                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7251                [(set DstVT.RC:$dst,
7252                      (OpNode (DstVT.VT DstVT.RC:$src1),
7253                               (ld_frag addr:$src2)))]>,
7254                EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7255}
7256  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7257                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7258                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7259}
7260
7261multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7262                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7263                               X86VectorVTInfo DstVT, string asm,
7264                               string mem> {
7265  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7266  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7267              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7268              !strconcat(asm,
7269                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7270              [(set DstVT.RC:$dst,
7271                    (OpNode (DstVT.VT DstVT.RC:$src1),
7272                             SrcRC:$src2,
7273                             (i32 timm:$rc)))]>,
7274              EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7275  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7276                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7277                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7278}
7279
7280multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7281                                X86FoldableSchedWrite sched,
7282                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7283                                X86MemOperand x86memop, PatFrag ld_frag,
7284                                string asm, string mem> {
7285  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7286              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7287                            ld_frag, asm, mem>, VEX_LIG;
7288}
7289
7290let Predicates = [HasAVX512] in {
7291defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7292                                 WriteCvtI2SS, GR32,
7293                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7294                                 TB, XS, EVEX_CD8<32, CD8VT1>;
7295defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7296                                 WriteCvtI2SS, GR64,
7297                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7298                                 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7299defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7300                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7301                                 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7302defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7303                                 WriteCvtI2SD, GR64,
7304                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7305                                 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7306
7307def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7308              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7309def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7310              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7311
7312def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7313          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7314def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7315          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7316def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7317          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7318def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7319          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7320
7321def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7322          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7323def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7324          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7325def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7326          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7327def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7328          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7329
7330defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7331                                  WriteCvtI2SS, GR32,
7332                                  v4f32x_info, i32mem, loadi32,
7333                                  "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7334defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7335                                  WriteCvtI2SS, GR64,
7336                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7337                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7338defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7339                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7340                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7341defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7342                                  WriteCvtI2SD, GR64,
7343                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7344                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7345
7346def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7347              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7348def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7349              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7350
7351def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7352          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7353def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7354          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7355def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7356          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7357def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7358          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7359
7360def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7361          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7362def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7363          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7364def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7365          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7366def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7367          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7368}
7369
7370//===----------------------------------------------------------------------===//
7371// AVX-512  Scalar convert from float/double to integer
7372//===----------------------------------------------------------------------===//
7373
7374multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7375                                  X86VectorVTInfo DstVT, SDNode OpNode,
7376                                  SDNode OpNodeRnd,
7377                                  X86FoldableSchedWrite sched, string asm,
7378                                  string aliasStr, Predicate prd = HasAVX512> {
7379  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7380    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7381                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7382                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7383                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7384    let Uses = [MXCSR] in
7385    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7386                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7387                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7388                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7389                 Sched<[sched]>;
7390    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7391                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7392                [(set DstVT.RC:$dst, (OpNode
7393                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7394                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7395  } // Predicates = [prd]
7396
7397  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7398          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7399  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7400          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7401  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7402          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7403                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7404}
7405
7406// Convert float/double to signed/unsigned int 32/64
7407defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7408                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7409                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7410defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7411                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7412                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7413defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7414                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7415                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7416defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7417                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7418                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7419defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7420                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7421                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7422defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7423                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7424                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7425defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7426                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7427                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7428defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7429                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7430                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7431
7432multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7433                        X86VectorVTInfo DstVT, SDNode OpNode,
7434                        X86FoldableSchedWrite sched> {
7435  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7436    let isCodeGenOnly = 1 in {
7437    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7438                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7439                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7440                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7441    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7442                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7443                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7444                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7445    }
7446  } // Predicates = [HasAVX512]
7447}
7448
7449defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7450                       lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7451defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7452                       llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7453defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7454                       lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7455defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7456                       llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7457
7458let Predicates = [HasAVX512] in {
7459  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7460  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7461
7462  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7463  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7464}
7465
7466// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7467// which produce unnecessary vmovs{s,d} instructions
7468let Predicates = [HasAVX512] in {
7469def : Pat<(v4f32 (X86Movss
7470                   (v4f32 VR128X:$dst),
7471                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7472          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7473
7474def : Pat<(v4f32 (X86Movss
7475                   (v4f32 VR128X:$dst),
7476                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7477          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7478
7479def : Pat<(v4f32 (X86Movss
7480                   (v4f32 VR128X:$dst),
7481                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7482          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7483
7484def : Pat<(v4f32 (X86Movss
7485                   (v4f32 VR128X:$dst),
7486                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7487          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7488
7489def : Pat<(v2f64 (X86Movsd
7490                   (v2f64 VR128X:$dst),
7491                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7492          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7493
7494def : Pat<(v2f64 (X86Movsd
7495                   (v2f64 VR128X:$dst),
7496                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7497          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7498
7499def : Pat<(v2f64 (X86Movsd
7500                   (v2f64 VR128X:$dst),
7501                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7502          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7503
7504def : Pat<(v2f64 (X86Movsd
7505                   (v2f64 VR128X:$dst),
7506                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7507          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7508
7509def : Pat<(v4f32 (X86Movss
7510                   (v4f32 VR128X:$dst),
7511                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7512          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7513
7514def : Pat<(v4f32 (X86Movss
7515                   (v4f32 VR128X:$dst),
7516                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7517          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7518
7519def : Pat<(v4f32 (X86Movss
7520                   (v4f32 VR128X:$dst),
7521                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7522          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7523
7524def : Pat<(v4f32 (X86Movss
7525                   (v4f32 VR128X:$dst),
7526                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7527          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7528
7529def : Pat<(v2f64 (X86Movsd
7530                   (v2f64 VR128X:$dst),
7531                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7532          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7533
7534def : Pat<(v2f64 (X86Movsd
7535                   (v2f64 VR128X:$dst),
7536                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7537          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7538
7539def : Pat<(v2f64 (X86Movsd
7540                   (v2f64 VR128X:$dst),
7541                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7542          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7543
7544def : Pat<(v2f64 (X86Movsd
7545                   (v2f64 VR128X:$dst),
7546                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7547          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7548} // Predicates = [HasAVX512]
7549
7550// Convert float/double to signed/unsigned int 32/64 with truncation
7551multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7552                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7553                            SDNode OpNodeInt, SDNode OpNodeSAE,
7554                            X86FoldableSchedWrite sched, string aliasStr,
7555                            Predicate prd = HasAVX512> {
7556let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7557  let isCodeGenOnly = 1 in {
7558  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7559              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7560              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7561              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7562  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7563              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7564              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7565              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7566  }
7567
7568  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7569            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7570           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7571           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7572  let Uses = [MXCSR] in
7573  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7574            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7575            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7576                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7577  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7578              (ins _SrcRC.IntScalarMemOp:$src),
7579              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7580              [(set _DstRC.RC:$dst,
7581                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7582              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7583} // Predicates = [prd]
7584
7585  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7586          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7587  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7588          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7589  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7590          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7591                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7592}
7593
7594defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7595                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7596                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7597defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7598                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7599                        "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7600defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7601                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7602                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7603defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7604                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7605                        "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7606
7607defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7608                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7609                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7610defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7611                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7612                        "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7613defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7614                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7615                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7616defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7617                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7618                        "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7619
7620//===----------------------------------------------------------------------===//
7621// AVX-512  Convert form float to double and back
7622//===----------------------------------------------------------------------===//
7623
7624let Uses = [MXCSR], mayRaiseFPException = 1 in
7625multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7626                                X86VectorVTInfo _Src, SDNode OpNode,
7627                                X86FoldableSchedWrite sched> {
7628  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7629                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7630                         "$src2, $src1", "$src1, $src2",
7631                         (_.VT (OpNode (_.VT _.RC:$src1),
7632                                       (_Src.VT _Src.RC:$src2)))>,
7633                         EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7634  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7635                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7636                         "$src2, $src1", "$src1, $src2",
7637                         (_.VT (OpNode (_.VT _.RC:$src1),
7638                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7639                         EVEX, VVVV, VEX_LIG,
7640                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7641
7642  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7643    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7644               (ins _.FRC:$src1, _Src.FRC:$src2),
7645               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7646               EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7647    let mayLoad = 1 in
7648    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7649               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7650               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7651               EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7652  }
7653}
7654
7655// Scalar Conversion with SAE - suppress all exceptions
7656multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7657                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7658                                    X86FoldableSchedWrite sched> {
7659  let Uses = [MXCSR] in
7660  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7661                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7662                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7663                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7664                                         (_Src.VT _Src.RC:$src2)))>,
7665                        EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7666}
7667
7668// Scalar Conversion with rounding control (RC)
7669multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7670                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7671                                   X86FoldableSchedWrite sched> {
7672  let Uses = [MXCSR] in
7673  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7674                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7675                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7676                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7677                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7678                        EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7679                        EVEX_B, EVEX_RC;
7680}
7681multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7682                                      SDNode OpNode, SDNode OpNodeRnd,
7683                                      X86FoldableSchedWrite sched,
7684                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685                                      Predicate prd = HasAVX512> {
7686  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7689                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7690  }
7691}
7692
7693multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7694                                       SDNode OpNode, SDNode OpNodeSAE,
7695                                       X86FoldableSchedWrite sched,
7696                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7697                                       Predicate prd = HasAVX512> {
7698  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7699    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7700             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7701             EVEX_CD8<_src.EltSize, CD8VT1>;
7702  }
7703}
7704defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7705                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7706                                         f32x_info>, TB, XD, REX_W;
7707defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7708                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7709                                          f64x_info>, TB, XS;
7710defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7711                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7712                                          f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7713defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7714                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7715                                          f64x_info, HasFP16>, T_MAP5, XS;
7716defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7717                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7718                                          f16x_info, HasFP16>, T_MAP5;
7719defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7720                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7721                                          f32x_info, HasFP16>, T_MAP6;
7722
7723def : Pat<(f64 (any_fpextend FR32X:$src)),
7724          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7725          Requires<[HasAVX512]>;
7726def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7727          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7728          Requires<[HasAVX512, OptForSize]>;
7729
7730def : Pat<(f32 (any_fpround FR64X:$src)),
7731          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7732           Requires<[HasAVX512]>;
7733
7734def : Pat<(f32 (any_fpextend FR16X:$src)),
7735          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7736          Requires<[HasFP16]>;
7737def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7738          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7739          Requires<[HasFP16, OptForSize]>;
7740
7741def : Pat<(f64 (any_fpextend FR16X:$src)),
7742          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7743          Requires<[HasFP16]>;
7744def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7745          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7746          Requires<[HasFP16, OptForSize]>;
7747
7748def : Pat<(f16 (any_fpround FR32X:$src)),
7749          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7750           Requires<[HasFP16]>;
7751def : Pat<(f16 (any_fpround FR64X:$src)),
7752          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7753           Requires<[HasFP16]>;
7754
7755def : Pat<(v4f32 (X86Movss
7756                   (v4f32 VR128X:$dst),
7757                   (v4f32 (scalar_to_vector
7758                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7759          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7760          Requires<[HasAVX512]>;
7761
7762def : Pat<(v2f64 (X86Movsd
7763                   (v2f64 VR128X:$dst),
7764                   (v2f64 (scalar_to_vector
7765                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7766          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7767          Requires<[HasAVX512]>;
7768
7769//===----------------------------------------------------------------------===//
7770// AVX-512  Vector convert from signed/unsigned integer to float/double
7771//          and from float/double to signed/unsigned integer
7772//===----------------------------------------------------------------------===//
7773
7774multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7775                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7776                          X86FoldableSchedWrite sched,
7777                          string Broadcast = _.BroadcastStr,
7778                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7779                          RegisterClass MaskRC = _.KRCWM,
7780                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7781                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7782let Uses = [MXCSR], mayRaiseFPException = 1 in {
7783  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7784                         (ins _Src.RC:$src),
7785                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7786                         (ins MaskRC:$mask, _Src.RC:$src),
7787                          OpcodeStr, "$src", "$src",
7788                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7789                         (vselect_mask MaskRC:$mask,
7790                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7791                                       _.RC:$src0),
7792                         (vselect_mask MaskRC:$mask,
7793                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7794                                       _.ImmAllZerosV)>,
7795                         EVEX, Sched<[sched]>;
7796
7797  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7798                         (ins MemOp:$src),
7799                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7800                         (ins MaskRC:$mask, MemOp:$src),
7801                         OpcodeStr#Alias, "$src", "$src",
7802                         LdDAG,
7803                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7804                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7805                         EVEX, Sched<[sched.Folded]>;
7806
7807  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7808                         (ins _Src.ScalarMemOp:$src),
7809                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7810                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7811                         OpcodeStr,
7812                         "${src}"#Broadcast, "${src}"#Broadcast,
7813                         (_.VT (OpNode (_Src.VT
7814                                  (_Src.BroadcastLdFrag addr:$src))
7815                            )),
7816                         (vselect_mask MaskRC:$mask,
7817                                       (_.VT
7818                                        (MaskOpNode
7819                                         (_Src.VT
7820                                          (_Src.BroadcastLdFrag addr:$src)))),
7821                                       _.RC:$src0),
7822                         (vselect_mask MaskRC:$mask,
7823                                       (_.VT
7824                                        (MaskOpNode
7825                                         (_Src.VT
7826                                          (_Src.BroadcastLdFrag addr:$src)))),
7827                                       _.ImmAllZerosV)>,
7828                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7829  }
7830}
7831// Conversion with SAE - suppress all exceptions
7832multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7834                              X86FoldableSchedWrite sched> {
7835  let Uses = [MXCSR] in
7836  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                        (ins _Src.RC:$src), OpcodeStr,
7838                        "{sae}, $src", "$src, {sae}",
7839                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7840                        EVEX, EVEX_B, Sched<[sched]>;
7841}
7842
7843// Conversion with rounding control (RC)
7844multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7846                         X86FoldableSchedWrite sched> {
7847  let Uses = [MXCSR] in
7848  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7849                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7850                        "$rc, $src", "$src, $rc",
7851                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7852                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7853}
7854
7855// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7856multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7857                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
7858                                SDNode MaskOpNode,
7859                                X86FoldableSchedWrite sched,
7860                                string Broadcast = _.BroadcastStr,
7861                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7862                                RegisterClass MaskRC = _.KRCWM>
7863  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7864                   Alias, MemOp, MaskRC,
7865                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7866                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7867
7868// Extend [Float to Double, Half to Float]
7869multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7870                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7871                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7872  let Predicates = [prd] in {
7873    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7874                            any_fpextend, fpextend, sched.ZMM>,
7875             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7876                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7877  }
7878  let Predicates = [prd, HasVLX] in {
7879    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7880                               X86any_vfpext, X86vfpext, sched.XMM,
7881                               _dst.info128.BroadcastStr,
7882                               "", f64mem>, EVEX_V128;
7883    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7884                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7885  }
7886}
7887
7888// Truncate [Double to Float, Float to Half]
7889multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7890                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7891                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7892                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7893                            PatFrag loadVT128 = _src.info128.LdFrag,
7894                            RegisterClass maskRC128 = _src.info128.KRCWM> {
7895  let Predicates = [prd] in {
7896    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7897                            X86any_vfpround, X86vfpround, sched.ZMM>,
7898             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7899                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7900  }
7901  let Predicates = [prd, HasVLX] in {
7902    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7903                               null_frag, null_frag, sched.XMM,
7904                               _src.info128.BroadcastStr, "{x}",
7905                               f128mem, maskRC128>, EVEX_V128;
7906    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7907                               X86any_vfpround, X86vfpround,
7908                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7909
7910    // Special patterns to allow use of X86vmfpround for masking. Instruction
7911    // patterns have been disabled with null_frag.
7912    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7913              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7914    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7915                            maskRC128:$mask),
7916              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7917    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7918                            maskRC128:$mask),
7919              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7920
7921    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7922              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7923    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7924                            maskRC128:$mask),
7925              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7926    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7927                            maskRC128:$mask),
7928              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7929
7930    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7931              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7932    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7933                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7934              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7935    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7936                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
7937              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7938  }
7939
7940  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7941                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7942  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7943                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7944                  VK2WM:$mask, VR128X:$src), 0, "att">;
7945  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7946                  "$dst {${mask}} {z}, $src}",
7947                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7948                  VK2WM:$mask, VR128X:$src), 0, "att">;
7949  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7950                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7951  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7952                  "$dst {${mask}}, ${src}{1to2}}",
7953                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7954                  VK2WM:$mask, f64mem:$src), 0, "att">;
7955  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7956                  "$dst {${mask}} {z}, ${src}{1to2}}",
7957                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7958                  VK2WM:$mask, f64mem:$src), 0, "att">;
7959
7960  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7961                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7962  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7963                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7964                  VK4WM:$mask, VR256X:$src), 0, "att">;
7965  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7966                  "$dst {${mask}} {z}, $src}",
7967                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7968                  VK4WM:$mask, VR256X:$src), 0, "att">;
7969  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7970                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7971  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7972                  "$dst {${mask}}, ${src}{1to4}}",
7973                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7974                  VK4WM:$mask, f64mem:$src), 0, "att">;
7975  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7976                  "$dst {${mask}} {z}, ${src}{1to4}}",
7977                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7978                  VK4WM:$mask, f64mem:$src), 0, "att">;
7979}
7980
7981defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7982                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7983                                  REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7984defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7985                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7986                                   TB, EVEX_CD8<32, CD8VH>;
7987
7988// Extend Half to Double
7989multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7990                            X86SchedWriteWidths sched> {
7991  let Predicates = [HasFP16] in {
7992    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7993                                  any_fpextend, fpextend, sched.ZMM>,
7994             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7995                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7996    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7997                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7998  }
7999  let Predicates = [HasFP16, HasVLX] in {
8000    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8001                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8002                                     f32mem>, EVEX_V128;
8003    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8004                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8005                                     f64mem>, EVEX_V256;
8006  }
8007}
8008
8009// Truncate Double to Half
8010multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8011  let Predicates = [HasFP16] in {
8012    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8013                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8014             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8015                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8016  }
8017  let Predicates = [HasFP16, HasVLX] in {
8018    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8019                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8020                               VK2WM>, EVEX_V128;
8021    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8022                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8023                               VK4WM>, EVEX_V256;
8024  }
8025  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8026                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8027                  VR128X:$src), 0, "att">;
8028  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8029                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8030                  VK2WM:$mask, VR128X:$src), 0, "att">;
8031  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8032                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8033                  VK2WM:$mask, VR128X:$src), 0, "att">;
8034  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8035                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8036                  i64mem:$src), 0, "att">;
8037  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8038                  "$dst {${mask}}, ${src}{1to2}}",
8039                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8040                  VK2WM:$mask, i64mem:$src), 0, "att">;
8041  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8042                  "$dst {${mask}} {z}, ${src}{1to2}}",
8043                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8044                  VK2WM:$mask, i64mem:$src), 0, "att">;
8045
8046  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8047                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8048                  VR256X:$src), 0, "att">;
8049  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8050                  "$dst {${mask}}, $src}",
8051                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8052                  VK4WM:$mask, VR256X:$src), 0, "att">;
8053  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8054                  "$dst {${mask}} {z}, $src}",
8055                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8056                  VK4WM:$mask, VR256X:$src), 0, "att">;
8057  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8058                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8059                  i64mem:$src), 0, "att">;
8060  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8061                  "$dst {${mask}}, ${src}{1to4}}",
8062                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8063                  VK4WM:$mask, i64mem:$src), 0, "att">;
8064  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8065                  "$dst {${mask}} {z}, ${src}{1to4}}",
8066                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8067                  VK4WM:$mask, i64mem:$src), 0, "att">;
8068
8069  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8070                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8071                  VR512:$src), 0, "att">;
8072  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8073                  "$dst {${mask}}, $src}",
8074                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8075                  VK8WM:$mask, VR512:$src), 0, "att">;
8076  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8077                  "$dst {${mask}} {z}, $src}",
8078                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8079                  VK8WM:$mask, VR512:$src), 0, "att">;
8080  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8081                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8082                  i64mem:$src), 0, "att">;
8083  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8084                  "$dst {${mask}}, ${src}{1to8}}",
8085                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8086                  VK8WM:$mask, i64mem:$src), 0, "att">;
8087  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8088                  "$dst {${mask}} {z}, ${src}{1to8}}",
8089                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8090                  VK8WM:$mask, i64mem:$src), 0, "att">;
8091}
8092
8093defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8094                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8095                                   HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8096defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8097                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8098                                    HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8099defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8100                                 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8101defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8102                                 T_MAP5, EVEX_CD8<16, CD8VQ>;
8103
8104let Predicates = [HasFP16, HasVLX] in {
8105  // Special patterns to allow use of X86vmfpround for masking. Instruction
8106  // patterns have been disabled with null_frag.
8107  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8108            (VCVTPD2PHZ256rr VR256X:$src)>;
8109  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8110                          VK4WM:$mask)),
8111            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8112  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8113                          VK4WM:$mask),
8114            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8115
8116  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8117            (VCVTPD2PHZ256rm addr:$src)>;
8118  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8119                          VK4WM:$mask),
8120            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8121  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8122                          VK4WM:$mask),
8123            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8124
8125  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8126            (VCVTPD2PHZ256rmb addr:$src)>;
8127  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8128                          (v8f16 VR128X:$src0), VK4WM:$mask),
8129            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8130  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8131                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8132            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8133
8134  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8135            (VCVTPD2PHZ128rr VR128X:$src)>;
8136  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8137                          VK2WM:$mask),
8138            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8139  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8140                          VK2WM:$mask),
8141            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8142
8143  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8144            (VCVTPD2PHZ128rm addr:$src)>;
8145  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8146                          VK2WM:$mask),
8147            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8148  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8149                          VK2WM:$mask),
8150            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8151
8152  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8153            (VCVTPD2PHZ128rmb addr:$src)>;
8154  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8155                          (v8f16 VR128X:$src0), VK2WM:$mask),
8156            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8157  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8158                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8159            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8160}
8161
8162// Convert Signed/Unsigned Doubleword to Double
8163let Uses = []<Register>, mayRaiseFPException = 0 in
8164multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8165                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8166                           SDNode MaskOpNode128,
8167                           X86SchedWriteWidths sched> {
8168  // No rounding in this op
8169  let Predicates = [HasAVX512] in
8170    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8171                            MaskOpNode, sched.ZMM>, EVEX_V512;
8172
8173  let Predicates = [HasVLX] in {
8174    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8175                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8176                               "", i64mem, VK2WM,
8177                               (v2f64 (OpNode128 (bc_v4i32
8178                                (v2i64
8179                                 (scalar_to_vector (loadi64 addr:$src)))))),
8180                               (v2f64 (MaskOpNode128 (bc_v4i32
8181                                (v2i64
8182                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8183                               EVEX_V128;
8184    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8185                               MaskOpNode, sched.YMM>, EVEX_V256;
8186  }
8187}
8188
8189// Convert Signed/Unsigned Doubleword to Float
8190multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8191                           SDNode MaskOpNode, SDNode OpNodeRnd,
8192                           X86SchedWriteWidths sched> {
8193  let Predicates = [HasAVX512] in
8194    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8195                            MaskOpNode, sched.ZMM>,
8196             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8197                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8198
8199  let Predicates = [HasVLX] in {
8200    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8201                               MaskOpNode, sched.XMM>, EVEX_V128;
8202    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8203                               MaskOpNode, sched.YMM>, EVEX_V256;
8204  }
8205}
8206
8207// Convert Float to Signed/Unsigned Doubleword with truncation
8208multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8209                            SDNode MaskOpNode,
8210                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8211  let Predicates = [HasAVX512] in {
8212    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8213                            MaskOpNode, sched.ZMM>,
8214             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8215                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8216  }
8217  let Predicates = [HasVLX] in {
8218    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8219                               MaskOpNode, sched.XMM>, EVEX_V128;
8220    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8221                               MaskOpNode, sched.YMM>, EVEX_V256;
8222  }
8223}
8224
8225// Convert Float to Signed/Unsigned Doubleword
8226multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8227                           SDNode MaskOpNode, SDNode OpNodeRnd,
8228                           X86SchedWriteWidths sched> {
8229  let Predicates = [HasAVX512] in {
8230    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8231                            MaskOpNode, sched.ZMM>,
8232             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8233                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8234  }
8235  let Predicates = [HasVLX] in {
8236    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8237                               MaskOpNode, sched.XMM>, EVEX_V128;
8238    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8239                               MaskOpNode, sched.YMM>, EVEX_V256;
8240  }
8241}
8242
8243// Convert Double to Signed/Unsigned Doubleword with truncation
8244multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8245                            SDNode MaskOpNode, SDNode OpNodeSAE,
8246                            X86SchedWriteWidths sched> {
8247  let Predicates = [HasAVX512] in {
8248    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8249                            MaskOpNode, sched.ZMM>,
8250             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8251                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8252  }
8253  let Predicates = [HasVLX] in {
8254    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8255    // memory forms of these instructions in Asm Parser. They have the same
8256    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8257    // due to the same reason.
8258    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8259                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8260                               VK2WM>, EVEX_V128;
8261    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8262                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8263  }
8264
8265  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8266                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8267                  VR128X:$src), 0, "att">;
8268  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8269                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8270                  VK2WM:$mask, VR128X:$src), 0, "att">;
8271  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8272                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8273                  VK2WM:$mask, VR128X:$src), 0, "att">;
8274  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8275                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8276                  f64mem:$src), 0, "att">;
8277  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8278                  "$dst {${mask}}, ${src}{1to2}}",
8279                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8280                  VK2WM:$mask, f64mem:$src), 0, "att">;
8281  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8282                  "$dst {${mask}} {z}, ${src}{1to2}}",
8283                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8284                  VK2WM:$mask, f64mem:$src), 0, "att">;
8285
8286  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8287                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8288                  VR256X:$src), 0, "att">;
8289  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8290                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8291                  VK4WM:$mask, VR256X:$src), 0, "att">;
8292  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8293                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8294                  VK4WM:$mask, VR256X:$src), 0, "att">;
8295  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8296                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8297                  f64mem:$src), 0, "att">;
8298  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8299                  "$dst {${mask}}, ${src}{1to4}}",
8300                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8301                  VK4WM:$mask, f64mem:$src), 0, "att">;
8302  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8303                  "$dst {${mask}} {z}, ${src}{1to4}}",
8304                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8305                  VK4WM:$mask, f64mem:$src), 0, "att">;
8306}
8307
8308// Convert Double to Signed/Unsigned Doubleword
8309multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8310                           SDNode MaskOpNode, SDNode OpNodeRnd,
8311                           X86SchedWriteWidths sched> {
8312  let Predicates = [HasAVX512] in {
8313    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8314                            MaskOpNode, sched.ZMM>,
8315             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8316                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8317  }
8318  let Predicates = [HasVLX] in {
8319    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8320    // memory forms of these instructions in Asm Parcer. They have the same
8321    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8322    // due to the same reason.
8323    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8324                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8325                               VK2WM>, EVEX_V128;
8326    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8327                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8328  }
8329
8330  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8331                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8332  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8333                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8334                  VK2WM:$mask, VR128X:$src), 0, "att">;
8335  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8336                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8337                  VK2WM:$mask, VR128X:$src), 0, "att">;
8338  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8339                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8340                  f64mem:$src), 0, "att">;
8341  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8342                  "$dst {${mask}}, ${src}{1to2}}",
8343                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8344                  VK2WM:$mask, f64mem:$src), 0, "att">;
8345  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8346                  "$dst {${mask}} {z}, ${src}{1to2}}",
8347                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8348                  VK2WM:$mask, f64mem:$src), 0, "att">;
8349
8350  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8351                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8352  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8353                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8354                  VK4WM:$mask, VR256X:$src), 0, "att">;
8355  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8356                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8357                  VK4WM:$mask, VR256X:$src), 0, "att">;
8358  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8359                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8360                  f64mem:$src), 0, "att">;
8361  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8362                  "$dst {${mask}}, ${src}{1to4}}",
8363                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8364                  VK4WM:$mask, f64mem:$src), 0, "att">;
8365  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8366                  "$dst {${mask}} {z}, ${src}{1to4}}",
8367                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8368                  VK4WM:$mask, f64mem:$src), 0, "att">;
8369}
8370
8371// Convert Double to Signed/Unsigned Quardword
8372multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8373                           SDNode MaskOpNode, SDNode OpNodeRnd,
8374                           X86SchedWriteWidths sched> {
8375  let Predicates = [HasDQI] in {
8376    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8377                            MaskOpNode, sched.ZMM>,
8378             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8379                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8380  }
8381  let Predicates = [HasDQI, HasVLX] in {
8382    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8383                               MaskOpNode, sched.XMM>, EVEX_V128;
8384    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8385                               MaskOpNode, sched.YMM>, EVEX_V256;
8386  }
8387}
8388
8389// Convert Double to Signed/Unsigned Quardword with truncation
8390multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8391                            SDNode MaskOpNode, SDNode OpNodeRnd,
8392                            X86SchedWriteWidths sched> {
8393  let Predicates = [HasDQI] in {
8394    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8395                            MaskOpNode, sched.ZMM>,
8396             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8397                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8398  }
8399  let Predicates = [HasDQI, HasVLX] in {
8400    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8401                               MaskOpNode, sched.XMM>, EVEX_V128;
8402    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8403                               MaskOpNode, sched.YMM>, EVEX_V256;
8404  }
8405}
8406
8407// Convert Signed/Unsigned Quardword to Double
8408multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8409                           SDNode MaskOpNode, SDNode OpNodeRnd,
8410                           X86SchedWriteWidths sched> {
8411  let Predicates = [HasDQI] in {
8412    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8413                            MaskOpNode, sched.ZMM>,
8414             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8415                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8416  }
8417  let Predicates = [HasDQI, HasVLX] in {
8418    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8419                               MaskOpNode, sched.XMM>, EVEX_V128;
8420    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8421                               MaskOpNode, sched.YMM>, EVEX_V256;
8422  }
8423}
8424
8425// Convert Float to Signed/Unsigned Quardword
8426multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8427                           SDNode MaskOpNode, SDNode OpNodeRnd,
8428                           X86SchedWriteWidths sched> {
8429  let Predicates = [HasDQI] in {
8430    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8431                            MaskOpNode, sched.ZMM>,
8432             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8433                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8434  }
8435  let Predicates = [HasDQI, HasVLX] in {
8436    // Explicitly specified broadcast string, since we take only 2 elements
8437    // from v4f32x_info source
8438    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8439                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8440                               (v2i64 (OpNode (bc_v4f32
8441                                (v2f64
8442                                 (scalar_to_vector (loadf64 addr:$src)))))),
8443                               (v2i64 (MaskOpNode (bc_v4f32
8444                                (v2f64
8445                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8446                               EVEX_V128;
8447    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8448                               MaskOpNode, sched.YMM>, EVEX_V256;
8449  }
8450}
8451
8452// Convert Float to Signed/Unsigned Quardword with truncation
8453multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8454                            SDNode MaskOpNode, SDNode OpNodeRnd,
8455                            X86SchedWriteWidths sched> {
8456  let Predicates = [HasDQI] in {
8457    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8458                            MaskOpNode, sched.ZMM>,
8459             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8460                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8461  }
8462  let Predicates = [HasDQI, HasVLX] in {
8463    // Explicitly specified broadcast string, since we take only 2 elements
8464    // from v4f32x_info source
8465    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8466                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8467                               (v2i64 (OpNode (bc_v4f32
8468                                (v2f64
8469                                 (scalar_to_vector (loadf64 addr:$src)))))),
8470                               (v2i64 (MaskOpNode (bc_v4f32
8471                                (v2f64
8472                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8473                               EVEX_V128;
8474    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8475                               MaskOpNode, sched.YMM>, EVEX_V256;
8476  }
8477}
8478
8479// Convert Signed/Unsigned Quardword to Float
8480// Also Convert Signed/Unsigned Doubleword to Half
8481multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8482                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8483                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8484                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8485                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8486  let Predicates = [prd] in {
8487    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8488                            MaskOpNode, sched.ZMM>,
8489             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8490                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8491  }
8492  let Predicates = [prd, HasVLX] in {
8493    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8494    // memory forms of these instructions in Asm Parcer. They have the same
8495    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8496    // due to the same reason.
8497    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8498                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8499                               "{x}", i128mem, _src.info128.KRCWM>,
8500                               EVEX_V128;
8501    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8502                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8503                               "{y}">, EVEX_V256;
8504
8505    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8506    // patterns have been disabled with null_frag.
8507    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8508              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8509    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8510                             _src.info128.KRCWM:$mask),
8511              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8512    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8513                             _src.info128.KRCWM:$mask),
8514              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8515
8516    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8517              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8518    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8519                             _src.info128.KRCWM:$mask),
8520              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8521    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8522                             _src.info128.KRCWM:$mask),
8523              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8524
8525    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8526              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8527    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8528                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8529              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8530    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8531                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8532              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8533  }
8534
8535  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8536                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8537                  VR128X:$src), 0, "att">;
8538  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8540                  VK2WM:$mask, VR128X:$src), 0, "att">;
8541  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8543                  VK2WM:$mask, VR128X:$src), 0, "att">;
8544  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8545                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8546                  i64mem:$src), 0, "att">;
8547  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8548                  "$dst {${mask}}, ${src}{1to2}}",
8549                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8550                  VK2WM:$mask, i64mem:$src), 0, "att">;
8551  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8552                  "$dst {${mask}} {z}, ${src}{1to2}}",
8553                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8554                  VK2WM:$mask, i64mem:$src), 0, "att">;
8555
8556  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8557                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8558                  VR256X:$src), 0, "att">;
8559  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8560                  "$dst {${mask}}, $src}",
8561                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8562                  VK4WM:$mask, VR256X:$src), 0, "att">;
8563  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8564                  "$dst {${mask}} {z}, $src}",
8565                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8566                  VK4WM:$mask, VR256X:$src), 0, "att">;
8567  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8568                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8569                  i64mem:$src), 0, "att">;
8570  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8571                  "$dst {${mask}}, ${src}{1to4}}",
8572                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8573                  VK4WM:$mask, i64mem:$src), 0, "att">;
8574  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8575                  "$dst {${mask}} {z}, ${src}{1to4}}",
8576                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8577                  VK4WM:$mask, i64mem:$src), 0, "att">;
8578}
8579
8580defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8581                                 X86any_VSintToFP, X86VSintToFP,
8582                                 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8583
8584defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8585                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8586                                TB, EVEX_CD8<32, CD8VF>;
8587
8588defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8589                                 X86cvttp2si, X86cvttp2siSAE,
8590                                 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8591
8592defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8593                                 X86cvttp2si, X86cvttp2siSAE,
8594                                 SchedWriteCvtPD2DQ>,
8595                                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8596
8597defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8598                                 X86cvttp2ui, X86cvttp2uiSAE,
8599                                 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8600
8601defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8602                                 X86cvttp2ui, X86cvttp2uiSAE,
8603                                 SchedWriteCvtPD2DQ>,
8604                                 TB, REX_W, EVEX_CD8<64, CD8VF>;
8605
8606defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8607                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8608                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8609
8610defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8611                                 uint_to_fp, X86VUintToFpRnd,
8612                                 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8613
8614defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8615                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8616                                 EVEX_CD8<32, CD8VF>;
8617
8618defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8619                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8620                                 REX_W, EVEX_CD8<64, CD8VF>;
8621
8622defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8623                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8624                                 TB, EVEX_CD8<32, CD8VF>;
8625
8626defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8627                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628                                 TB, EVEX_CD8<64, CD8VF>;
8629
8630defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8631                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8632                                 TB, PD, EVEX_CD8<64, CD8VF>;
8633
8634defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8635                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8636                                 EVEX_CD8<32, CD8VH>;
8637
8638defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8639                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8640                                 TB, PD, EVEX_CD8<64, CD8VF>;
8641
8642defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8643                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8644                                 EVEX_CD8<32, CD8VH>;
8645
8646defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8647                                 X86cvttp2si, X86cvttp2siSAE,
8648                                 SchedWriteCvtPD2DQ>, REX_W,
8649                                 TB, PD, EVEX_CD8<64, CD8VF>;
8650
8651defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8652                                 X86cvttp2si, X86cvttp2siSAE,
8653                                 SchedWriteCvtPS2DQ>, TB, PD,
8654                                 EVEX_CD8<32, CD8VH>;
8655
8656defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8657                                 X86cvttp2ui, X86cvttp2uiSAE,
8658                                 SchedWriteCvtPD2DQ>, REX_W,
8659                                 TB, PD, EVEX_CD8<64, CD8VF>;
8660
8661defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8662                                 X86cvttp2ui, X86cvttp2uiSAE,
8663                                 SchedWriteCvtPS2DQ>, TB, PD,
8664                                 EVEX_CD8<32, CD8VH>;
8665
8666defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8667                            sint_to_fp, X86VSintToFpRnd,
8668                            SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8669
8670defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8671                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8672                            REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8673
8674defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8675                            X86any_VSintToFP, X86VMSintToFP,
8676                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8677                            SchedWriteCvtDQ2PS, HasFP16>,
8678                            T_MAP5, EVEX_CD8<32, CD8VF>;
8679
8680defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8681                            X86any_VUintToFP, X86VMUintToFP,
8682                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8683                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8684                            EVEX_CD8<32, CD8VF>;
8685
8686defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8687                            X86any_VSintToFP, X86VMSintToFP,
8688                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8689                            SchedWriteCvtDQ2PS>, REX_W, TB,
8690                            EVEX_CD8<64, CD8VF>;
8691
8692defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8693                            X86any_VUintToFP, X86VMUintToFP,
8694                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8695                            SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8696                            EVEX_CD8<64, CD8VF>;
8697
8698let Predicates = [HasVLX] in {
8699  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8700  // patterns have been disabled with null_frag.
8701  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8702            (VCVTPD2DQZ128rr VR128X:$src)>;
8703  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8704                          VK2WM:$mask),
8705            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8706  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8707                          VK2WM:$mask),
8708            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8709
8710  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8711            (VCVTPD2DQZ128rm addr:$src)>;
8712  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8713                          VK2WM:$mask),
8714            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8715  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8716                          VK2WM:$mask),
8717            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8718
8719  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8720            (VCVTPD2DQZ128rmb addr:$src)>;
8721  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8722                          (v4i32 VR128X:$src0), VK2WM:$mask),
8723            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8724  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8725                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8726            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8727
8728  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8729  // patterns have been disabled with null_frag.
8730  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8731            (VCVTTPD2DQZ128rr VR128X:$src)>;
8732  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8733                          VK2WM:$mask),
8734            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8735  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8736                          VK2WM:$mask),
8737            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8738
8739  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8740            (VCVTTPD2DQZ128rm addr:$src)>;
8741  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8742                          VK2WM:$mask),
8743            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8744  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8745                          VK2WM:$mask),
8746            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8747
8748  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8749            (VCVTTPD2DQZ128rmb addr:$src)>;
8750  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8751                          (v4i32 VR128X:$src0), VK2WM:$mask),
8752            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8753  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8754                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8755            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8756
8757  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8758  // patterns have been disabled with null_frag.
8759  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8760            (VCVTPD2UDQZ128rr VR128X:$src)>;
8761  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8762                           VK2WM:$mask),
8763            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8764  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8765                           VK2WM:$mask),
8766            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8767
8768  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8769            (VCVTPD2UDQZ128rm addr:$src)>;
8770  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8771                           VK2WM:$mask),
8772            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8773  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8774                           VK2WM:$mask),
8775            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8776
8777  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8778            (VCVTPD2UDQZ128rmb addr:$src)>;
8779  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8780                           (v4i32 VR128X:$src0), VK2WM:$mask),
8781            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8782  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8783                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8784            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8785
8786  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8787  // patterns have been disabled with null_frag.
8788  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8789            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8790  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8791                          VK2WM:$mask),
8792            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8793  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8794                          VK2WM:$mask),
8795            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8796
8797  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8798            (VCVTTPD2UDQZ128rm addr:$src)>;
8799  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8800                          VK2WM:$mask),
8801            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8802  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8803                          VK2WM:$mask),
8804            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8805
8806  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8807            (VCVTTPD2UDQZ128rmb addr:$src)>;
8808  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8809                          (v4i32 VR128X:$src0), VK2WM:$mask),
8810            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8812                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8813            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8814
8815  def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>;
8816  def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>;
8817  def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>;
8818  def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>;
8819  def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>;
8820  def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>;
8821}
8822def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>;
8823def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>;
8824def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>;
8825def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>;
8826
8827let Predicates = [HasDQI, HasVLX] in {
8828  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8829            (VCVTPS2QQZ128rm addr:$src)>;
8830  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8831                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8832                                 VR128X:$src0)),
8833            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8834  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8835                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8836                                 v2i64x_info.ImmAllZerosV)),
8837            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8838
8839  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8840            (VCVTPS2UQQZ128rm addr:$src)>;
8841  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8842                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8843                                 VR128X:$src0)),
8844            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8845  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8846                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8847                                 v2i64x_info.ImmAllZerosV)),
8848            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8849
8850  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8851            (VCVTTPS2QQZ128rm addr:$src)>;
8852  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8853                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8854                                 VR128X:$src0)),
8855            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8856  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8857                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8858                                 v2i64x_info.ImmAllZerosV)),
8859            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8860
8861  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8862            (VCVTTPS2UQQZ128rm addr:$src)>;
8863  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8864                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8865                                 VR128X:$src0)),
8866            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8867  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8868                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8869                                 v2i64x_info.ImmAllZerosV)),
8870            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8871
8872  def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8873  def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8874  def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8875  def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8876  def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8877  def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8878  def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8879  def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8880  def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8881  def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8882  def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8883  def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8884}
8885
8886let Predicates = [HasDQI] in {
8887  def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8888  def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8889  def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8890  def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8891  def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8892  def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8893  def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8894  def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8895}
8896
8897let Predicates = [HasVLX] in {
8898  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8899            (VCVTDQ2PDZ128rm addr:$src)>;
8900  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8901                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8902                                 VR128X:$src0)),
8903            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8904  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8905                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8906                                 v2f64x_info.ImmAllZerosV)),
8907            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8908
8909  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8910            (VCVTUDQ2PDZ128rm addr:$src)>;
8911  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8912                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8913                                 VR128X:$src0)),
8914            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8915  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8916                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8917                                 v2f64x_info.ImmAllZerosV)),
8918            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8919}
8920
8921//===----------------------------------------------------------------------===//
8922// Half precision conversion instructions
8923//===----------------------------------------------------------------------===//
8924
8925let Uses = [MXCSR], mayRaiseFPException = 1 in
8926multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8927                           X86MemOperand x86memop, dag ld_dag,
8928                           X86FoldableSchedWrite sched> {
8929  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8930                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8931                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8932                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8933                            T8, PD, Sched<[sched]>;
8934  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8935                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8936                            (X86any_cvtph2ps (_src.VT ld_dag)),
8937                            (X86cvtph2ps (_src.VT ld_dag))>,
8938                            T8, PD, Sched<[sched.Folded]>;
8939}
8940
8941multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8942                               X86FoldableSchedWrite sched> {
8943  let Uses = [MXCSR] in
8944  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8945                             (ins _src.RC:$src), "vcvtph2ps",
8946                             "{sae}, $src", "$src, {sae}",
8947                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8948                             T8, PD, EVEX_B, Sched<[sched]>;
8949}
8950
8951let Predicates = [HasAVX512] in
8952  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8953                                    (load addr:$src), WriteCvtPH2PSZ>,
8954                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8955                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8956
8957let Predicates = [HasVLX] in {
8958  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8959                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8960                       EVEX_CD8<32, CD8VH>;
8961  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8962                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8963                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8964                       EVEX_CD8<32, CD8VH>;
8965
8966  // Pattern match vcvtph2ps of a scalar i64 load.
8967  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8968              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8969            (VCVTPH2PSZ128rm addr:$src)>;
8970}
8971
8972multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8973                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8974let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8975  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8976             (ins _src.RC:$src1, i32u8imm:$src2),
8977             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8978             [(set _dest.RC:$dst,
8979                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8980             Sched<[RR]>;
8981  let Constraints = "$src0 = $dst" in
8982  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8983             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8984             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8985             [(set _dest.RC:$dst,
8986                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8987                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8988             Sched<[RR]>, EVEX_K;
8989  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8990             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8991             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8992             [(set _dest.RC:$dst,
8993                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8994                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8995             Sched<[RR]>, EVEX_KZ;
8996  let hasSideEffects = 0, mayStore = 1 in {
8997    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8998               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8999               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9000               Sched<[MR]>;
9001    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9002               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9003               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9004                EVEX_K, Sched<[MR]>;
9005  }
9006}
9007}
9008
9009multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9010                               SchedWrite Sched> {
9011  let hasSideEffects = 0, Uses = [MXCSR] in {
9012    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9013              (ins _src.RC:$src1, i32u8imm:$src2),
9014              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9015              [(set _dest.RC:$dst,
9016                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9017              EVEX_B, Sched<[Sched]>;
9018    let Constraints = "$src0 = $dst" in
9019    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9020              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9021              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9022              [(set _dest.RC:$dst,
9023                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9024                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9025              EVEX_B, Sched<[Sched]>, EVEX_K;
9026    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9027              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9028              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9029              [(set _dest.RC:$dst,
9030                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9031                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9032              EVEX_B, Sched<[Sched]>, EVEX_KZ;
9033}
9034}
9035
9036let Predicates = [HasAVX512] in {
9037  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9038                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9039                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9040                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9041
9042  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9043            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9044}
9045
9046let Predicates = [HasVLX] in {
9047  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9048                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9049                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9050  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9051                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9052                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9053
9054  def : Pat<(store (f64 (extractelt
9055                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9056                         (iPTR 0))), addr:$dst),
9057            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9058  def : Pat<(store (i64 (extractelt
9059                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9060                         (iPTR 0))), addr:$dst),
9061            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9062  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9063            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9064}
9065
9066//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9067multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9068                              string OpcodeStr, Domain d,
9069                              X86FoldableSchedWrite sched = WriteFComX> {
9070  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9071  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9072                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9073                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9074}
9075
9076let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9077  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9078                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9079  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9080                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9081  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9082                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9083  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9084                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9085}
9086
9087let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9088  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9089                                 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9090                                 EVEX_CD8<32, CD8VT1>;
9091  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9092                                  "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9093                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9094  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9095                                 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9096                                 EVEX_CD8<32, CD8VT1>;
9097  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9098                                 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9099                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9100  let isCodeGenOnly = 1 in {
9101    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9102                          sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9103                          EVEX_CD8<32, CD8VT1>;
9104    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9105                          sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9106                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9107
9108    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9109                          sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9110                          EVEX_CD8<32, CD8VT1>;
9111    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9112                          sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9113                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9114  }
9115}
9116
9117let Defs = [EFLAGS], Predicates = [HasFP16] in {
9118  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9119                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9120                                EVEX_CD8<16, CD8VT1>;
9121  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9122                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9123                                EVEX_CD8<16, CD8VT1>;
9124  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9125                                "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9126                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9127  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9128                                "comish", SSEPackedSingle>, T_MAP5, EVEX,
9129                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9130  let isCodeGenOnly = 1 in {
9131    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9132                                sse_load_f16, "ucomish", SSEPackedSingle>,
9133                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9134
9135    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9136                                sse_load_f16, "comish", SSEPackedSingle>,
9137                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9138  }
9139}
9140
9141/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9142multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9143                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9144                         Predicate prd = HasAVX512> {
9145  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9146  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9147                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9148                           "$src2, $src1", "$src1, $src2",
9149                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9150                           EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9151  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9152                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9153                         "$src2, $src1", "$src1, $src2",
9154                         (OpNode (_.VT _.RC:$src1),
9155                          (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9156                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9157}
9158}
9159
9160defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9161                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9162                               T_MAP6, PD;
9163defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9164                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9165                                 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9166let Uses = [MXCSR] in {
9167defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9168                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9169                               T8, PD;
9170defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9171                               f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9172                               T8, PD;
9173defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9174                                 SchedWriteFRsqrt.Scl, f32x_info>,
9175                                 EVEX_CD8<32, CD8VT1>, T8, PD;
9176defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9177                                 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9178                                 EVEX_CD8<64, CD8VT1>, T8, PD;
9179}
9180
9181/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9182multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9183                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9184  let ExeDomain = _.ExeDomain in {
9185  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9186                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9187                         (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9188                         Sched<[sched]>;
9189  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9190                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9191                         (OpNode (_.VT
9192                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9193                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9194  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9195                          (ins _.ScalarMemOp:$src), OpcodeStr,
9196                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9197                          (OpNode (_.VT
9198                            (_.BroadcastLdFrag addr:$src)))>,
9199                          EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9200  }
9201}
9202
9203multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9204                                X86SchedWriteWidths sched> {
9205  let Uses = [MXCSR] in {
9206  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9207                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9208  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9209                             v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9210  }
9211  let Predicates = [HasFP16] in
9212  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9213                           v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9214
9215  // Define only if AVX512VL feature is present.
9216  let Predicates = [HasVLX], Uses = [MXCSR] in {
9217    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9218                                  OpNode, sched.XMM, v4f32x_info>,
9219                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9220    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9221                                  OpNode, sched.YMM, v8f32x_info>,
9222                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9223    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9224                                  OpNode, sched.XMM, v2f64x_info>,
9225                                  EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9226    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9227                                  OpNode, sched.YMM, v4f64x_info>,
9228                                  EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9229  }
9230  let Predicates = [HasFP16, HasVLX] in {
9231    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9232                                OpNode, sched.XMM, v8f16x_info>,
9233                                EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9234    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9235                                OpNode, sched.YMM, v16f16x_info>,
9236                                EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9237  }
9238}
9239
9240defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9241defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9242
9243/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9244multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9245                         SDNode OpNode, SDNode OpNodeSAE,
9246                         X86FoldableSchedWrite sched> {
9247  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9248  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9249                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9250                           "$src2, $src1", "$src1, $src2",
9251                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9252                           Sched<[sched]>, SIMD_EXC;
9253
9254  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9255                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9256                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9257                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9258                            EVEX_B, Sched<[sched]>;
9259
9260  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9261                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9262                         "$src2, $src1", "$src1, $src2",
9263                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9264                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9265  }
9266}
9267
9268multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9269                             X86FoldableSchedWrite sched> {
9270  let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
9271  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9272                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9273                           "$src2, $src1", "$src1, $src2",
9274                           (null_frag)>, Sched<[sched]>, SIMD_EXC;
9275  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9276                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9277                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9278                            (null_frag)>, EVEX_B, Sched<[sched]>;
9279  let mayLoad = 1 in
9280  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9281                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9282                         "$src2, $src1", "$src1, $src2",
9283                         (null_frag)>,
9284                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9285  }
9286}
9287
9288multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
9289                            X86FoldableSchedWrite sched> {
9290  defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
9291             EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9292  defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
9293             EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9294}
9295
9296defm VRCP28   : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
9297defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;
9298
9299multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9300                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9301  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9302                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9303  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9304                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9305}
9306
9307multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9308                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9309  let Predicates = [HasFP16] in
9310  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9311               EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9312}
9313
9314defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9315                              SchedWriteFRnd.Scl>,
9316                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9317                                  SchedWriteFRnd.Scl>;
9318/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9319
9320multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9321                         SDNode OpNode, X86FoldableSchedWrite sched> {
9322  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9323  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9324                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9325                         (OpNode (_.VT _.RC:$src))>,
9326                         Sched<[sched]>;
9327
9328  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9329                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9330                         (OpNode (_.VT
9331                             (bitconvert (_.LdFrag addr:$src))))>,
9332                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9333
9334  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9335                         (ins _.ScalarMemOp:$src), OpcodeStr,
9336                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9337                         (OpNode (_.VT
9338                                  (_.BroadcastLdFrag addr:$src)))>,
9339                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9340  }
9341}
9342multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9343                         SDNode OpNode, X86FoldableSchedWrite sched> {
9344  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9345  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9346                        (ins _.RC:$src), OpcodeStr,
9347                        "{sae}, $src", "$src, {sae}",
9348                        (OpNode (_.VT _.RC:$src))>,
9349                        EVEX_B, Sched<[sched]>;
9350}
9351
9352multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9353                             X86FoldableSchedWrite sched> {
9354  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
9355    hasSideEffects = 0 in {
9356  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9357                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9358                         (null_frag)>, Sched<[sched]>;
9359  let mayLoad = 1 in
9360  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9361                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9362                         (null_frag)>,
9363                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9364  let mayLoad = 1 in
9365  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9366                         (ins _.ScalarMemOp:$src), OpcodeStr,
9367                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9368                         (null_frag)>,
9369                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9370  }
9371}
9372multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9373                                X86FoldableSchedWrite sched> {
9374  let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
9375  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9376                        (ins _.RC:$src), OpcodeStr,
9377                        "{sae}, $src", "$src, {sae}",
9378                        (null_frag)>, Sched<[sched]>, EVEX_B;
9379}
9380
9381multiclass  avx512_eri_ass<bits<8> opc, string OpcodeStr,
9382                           X86SchedWriteWidths sched> {
9383   defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9384              avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9385              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9386   defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9387              avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9388              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9389}
9390
9391defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
9392defm VRCP28   : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
9393defm VEXP2    : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;
9394
9395multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9396                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9397   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9398              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9399              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9400   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9401              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9402              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9403}
9404
9405multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9406                                  SDNode OpNode, X86SchedWriteWidths sched> {
9407  // Define only if AVX512VL feature is present.
9408  let Predicates = [HasVLX] in {
9409    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9410                                sched.XMM>,
9411                                EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9412    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9413                                sched.YMM>,
9414                                EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9415    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9416                                sched.XMM>,
9417                                EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9418    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9419                                sched.YMM>,
9420                                EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9421  }
9422}
9423
9424multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9425                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9426  let Predicates = [HasFP16] in
9427  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9428              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9429              T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9430  let Predicates = [HasFP16, HasVLX] in {
9431    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9432                                     EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9433    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9434                                     EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9435  }
9436}
9437defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9438                            SchedWriteFRnd>,
9439                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9440                                     SchedWriteFRnd>,
9441                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9442                                          SchedWriteFRnd>, EVEX;
9443
9444multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9445                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9446  let ExeDomain = _.ExeDomain in
9447  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9448                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9449                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9450                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9451}
9452
9453multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9454                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9455  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9456  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9457                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9458                         (_.VT (any_fsqrt _.RC:$src)),
9459                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9460                         Sched<[sched]>;
9461  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9462                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9463                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9464                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9465                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9466  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467                          (ins _.ScalarMemOp:$src), OpcodeStr,
9468                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9469                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9470                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9471                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9472  }
9473}
9474
9475let Uses = [MXCSR], mayRaiseFPException = 1 in
9476multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9477                                  X86SchedWriteSizes sched> {
9478  let Predicates = [HasFP16] in
9479  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9480                                sched.PH.ZMM, v32f16_info>,
9481                                EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9482  let Predicates = [HasFP16, HasVLX] in {
9483    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9484                                     sched.PH.XMM, v8f16x_info>,
9485                                     EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9486    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9487                                     sched.PH.YMM, v16f16x_info>,
9488                                     EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9489  }
9490  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9491                                sched.PS.ZMM, v16f32_info>,
9492                                EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9493  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9494                                sched.PD.ZMM, v8f64_info>,
9495                                EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9496  // Define only if AVX512VL feature is present.
9497  let Predicates = [HasVLX] in {
9498    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9499                                     sched.PS.XMM, v4f32x_info>,
9500                                     EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9501    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9502                                     sched.PS.YMM, v8f32x_info>,
9503                                     EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9504    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9505                                     sched.PD.XMM, v2f64x_info>,
9506                                     EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9507    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9508                                     sched.PD.YMM, v4f64x_info>,
9509                                     EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9510  }
9511}
9512
9513let Uses = [MXCSR] in
9514multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9515                                        X86SchedWriteSizes sched> {
9516  let Predicates = [HasFP16] in
9517  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9518                                      sched.PH.ZMM, v32f16_info>,
9519                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9520  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9521                                      sched.PS.ZMM, v16f32_info>,
9522                                      EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9523  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9524                                      sched.PD.ZMM, v8f64_info>,
9525                                      EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9526}
9527
9528multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9529                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9530  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9531    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9532                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9533                         "$src2, $src1", "$src1, $src2",
9534                         (X86fsqrts (_.VT _.RC:$src1),
9535                                    (_.VT _.RC:$src2))>,
9536                         Sched<[sched]>, SIMD_EXC;
9537    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9538                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9539                         "$src2, $src1", "$src1, $src2",
9540                         (X86fsqrts (_.VT _.RC:$src1),
9541                                    (_.ScalarIntMemFrags addr:$src2))>,
9542                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9543    let Uses = [MXCSR] in
9544    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9545                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9546                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9547                         (X86fsqrtRnds (_.VT _.RC:$src1),
9548                                     (_.VT _.RC:$src2),
9549                                     (i32 timm:$rc))>,
9550                         EVEX_B, EVEX_RC, Sched<[sched]>;
9551
9552    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9553      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9554                (ins _.FRC:$src1, _.FRC:$src2),
9555                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9556                Sched<[sched]>, SIMD_EXC;
9557      let mayLoad = 1 in
9558        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9559                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9560                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9561                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9562    }
9563  }
9564
9565  let Predicates = [prd] in {
9566    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9567              (!cast<Instruction>(Name#Zr)
9568                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9569  }
9570
9571  let Predicates = [prd, OptForSize] in {
9572    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9573              (!cast<Instruction>(Name#Zm)
9574                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9575  }
9576}
9577
9578multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9579                                  X86SchedWriteSizes sched> {
9580  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9581                        EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9582  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9583                        EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9584  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9585                        EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9586}
9587
9588defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9589             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9590
9591defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9592
9593multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9594                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9595  let ExeDomain = _.ExeDomain in {
9596  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9597                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9598                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9599                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9600                           (i32 timm:$src3)))>,
9601                           Sched<[sched]>, SIMD_EXC;
9602
9603  let Uses = [MXCSR] in
9604  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9605                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9606                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9607                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9608                         (i32 timm:$src3)))>, EVEX_B,
9609                         Sched<[sched]>;
9610
9611  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9612                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9613                         OpcodeStr,
9614                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9615                         (_.VT (X86RndScales _.RC:$src1,
9616                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9617                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9618
9619  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9620    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9621               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9622               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9623               []>, Sched<[sched]>, SIMD_EXC;
9624
9625    let mayLoad = 1 in
9626      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9627                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9628                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9629                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9630  }
9631  }
9632
9633  let Predicates = [HasAVX512] in {
9634    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9635              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9636               _.FRC:$src1, timm:$src2))>;
9637  }
9638
9639  let Predicates = [HasAVX512, OptForSize] in {
9640    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9641              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9642               addr:$src1, timm:$src2))>;
9643  }
9644}
9645
9646let Predicates = [HasFP16] in
9647defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9648                                           SchedWriteFRnd.Scl, f16x_info>,
9649                                           AVX512PSIi8Base, TA, EVEX, VVVV,
9650                                           EVEX_CD8<16, CD8VT1>;
9651
9652defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9653                                           SchedWriteFRnd.Scl, f32x_info>,
9654                                           AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9655                                           EVEX_CD8<32, CD8VT1>;
9656
9657defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9658                                           SchedWriteFRnd.Scl, f64x_info>,
9659                                           REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9660                                           EVEX_CD8<64, CD8VT1>;
9661
9662multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9663                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9664                                dag OutMask, Predicate BasePredicate> {
9665  let Predicates = [BasePredicate] in {
9666    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9667               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9668               (extractelt _.VT:$dst, (iPTR 0))))),
9669              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9670               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9671
9672    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9673               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9674               ZeroFP))),
9675              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9676               OutMask, _.VT:$src2, _.VT:$src1)>;
9677  }
9678}
9679
9680defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9681                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9682                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9683defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9684                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9685                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9686defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9687                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9688                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9689
9690
9691//-------------------------------------------------
9692// Integer truncate and extend operations
9693//-------------------------------------------------
9694
9695multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9696                              SDPatternOperator MaskNode,
9697                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9698                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9699  let ExeDomain = DestInfo.ExeDomain in {
9700  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9701             (ins SrcInfo.RC:$src),
9702             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9703             [(set DestInfo.RC:$dst,
9704                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9705             EVEX, Sched<[sched]>;
9706  let Constraints = "$src0 = $dst" in
9707  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9708             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9709             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9710             [(set DestInfo.RC:$dst,
9711                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9712                             (DestInfo.VT DestInfo.RC:$src0),
9713                             SrcInfo.KRCWM:$mask))]>,
9714             EVEX, EVEX_K, Sched<[sched]>;
9715  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9716             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9717             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9718             [(set DestInfo.RC:$dst,
9719                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9720                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9721             EVEX, EVEX_KZ, Sched<[sched]>;
9722  }
9723
9724  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9725    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9726               (ins x86memop:$dst, SrcInfo.RC:$src),
9727               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9728               EVEX, Sched<[sched.Folded]>;
9729
9730    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9731               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9732               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9733               EVEX, EVEX_K, Sched<[sched.Folded]>;
9734  }//mayStore = 1, hasSideEffects = 0
9735}
9736
9737multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9738                                    PatFrag truncFrag, PatFrag mtruncFrag,
9739                                    string Name> {
9740
9741  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9742            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9743                                    addr:$dst, SrcInfo.RC:$src)>;
9744
9745  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9746                        SrcInfo.KRCWM:$mask),
9747            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9748                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9749}
9750
9751multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9752                        SDNode OpNode256, SDNode OpNode512,
9753                        SDPatternOperator MaskNode128,
9754                        SDPatternOperator MaskNode256,
9755                        SDPatternOperator MaskNode512,
9756                        X86SchedWriteWidths sched,
9757                        AVX512VLVectorVTInfo VTSrcInfo,
9758                        X86VectorVTInfo DestInfoZ128,
9759                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9760                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9761                        X86MemOperand x86memopZ, PatFrag truncFrag,
9762                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9763
9764  let Predicates = [HasVLX, prd] in {
9765    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9766                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9767                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9768                                         mtruncFrag, NAME>, EVEX_V128;
9769
9770    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9771                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9772                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9773                                         mtruncFrag, NAME>, EVEX_V256;
9774  }
9775  let Predicates = [prd] in
9776    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9777                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9778                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9779                                         mtruncFrag, NAME>, EVEX_V512;
9780}
9781
9782multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9783                           X86SchedWriteWidths sched, PatFrag StoreNode,
9784                           PatFrag MaskedStoreNode, SDNode InVecNode,
9785                           SDPatternOperator InVecMaskNode> {
9786  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9787                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9788                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9789                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9790                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9791}
9792
9793multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9794                           SDPatternOperator MaskNode,
9795                           X86SchedWriteWidths sched, PatFrag StoreNode,
9796                           PatFrag MaskedStoreNode, SDNode InVecNode,
9797                           SDPatternOperator InVecMaskNode> {
9798  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9799                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9800                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9801                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9802                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9803}
9804
9805multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9806                           SDPatternOperator MaskNode,
9807                           X86SchedWriteWidths sched, PatFrag StoreNode,
9808                           PatFrag MaskedStoreNode, SDNode InVecNode,
9809                           SDPatternOperator InVecMaskNode> {
9810  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9811                          InVecMaskNode, MaskNode, MaskNode, sched,
9812                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9813                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9814                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9815}
9816
9817multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9818                           SDPatternOperator MaskNode,
9819                           X86SchedWriteWidths sched, PatFrag StoreNode,
9820                           PatFrag MaskedStoreNode, SDNode InVecNode,
9821                           SDPatternOperator InVecMaskNode> {
9822  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9823                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9824                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9825                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9826                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9827}
9828
9829multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9830                           SDPatternOperator MaskNode,
9831                           X86SchedWriteWidths sched, PatFrag StoreNode,
9832                           PatFrag MaskedStoreNode, SDNode InVecNode,
9833                           SDPatternOperator InVecMaskNode> {
9834  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9835                          InVecMaskNode, MaskNode, MaskNode, sched,
9836                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9837                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9838                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9839}
9840
9841multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9842                           SDPatternOperator MaskNode,
9843                           X86SchedWriteWidths sched, PatFrag StoreNode,
9844                           PatFrag MaskedStoreNode, SDNode InVecNode,
9845                           SDPatternOperator InVecMaskNode> {
9846  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9847                          InVecMaskNode, MaskNode, MaskNode, sched,
9848                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9849                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9850                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9851}
9852
9853defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9854                                  SchedWriteVecTruncate, truncstorevi8,
9855                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9856defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9857                                  SchedWriteVecTruncate, truncstore_s_vi8,
9858                                  masked_truncstore_s_vi8, X86vtruncs,
9859                                  X86vmtruncs>;
9860defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9861                                  SchedWriteVecTruncate, truncstore_us_vi8,
9862                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9863
9864defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9865                                  SchedWriteVecTruncate, truncstorevi16,
9866                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9867defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9868                                  SchedWriteVecTruncate, truncstore_s_vi16,
9869                                  masked_truncstore_s_vi16, X86vtruncs,
9870                                  X86vmtruncs>;
9871defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9872                                  select_truncus, SchedWriteVecTruncate,
9873                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9874                                  X86vtruncus, X86vmtruncus>;
9875
9876defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9877                                  SchedWriteVecTruncate, truncstorevi32,
9878                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9879defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9880                                  SchedWriteVecTruncate, truncstore_s_vi32,
9881                                  masked_truncstore_s_vi32, X86vtruncs,
9882                                  X86vmtruncs>;
9883defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9884                                  select_truncus, SchedWriteVecTruncate,
9885                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9886                                  X86vtruncus, X86vmtruncus>;
9887
9888defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9889                                  SchedWriteVecTruncate, truncstorevi8,
9890                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9891defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9892                                  SchedWriteVecTruncate, truncstore_s_vi8,
9893                                  masked_truncstore_s_vi8, X86vtruncs,
9894                                  X86vmtruncs>;
9895defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9896                                  select_truncus, SchedWriteVecTruncate,
9897                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9898                                  X86vtruncus, X86vmtruncus>;
9899
9900defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9901                                  SchedWriteVecTruncate, truncstorevi16,
9902                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9903defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9904                                  SchedWriteVecTruncate, truncstore_s_vi16,
9905                                  masked_truncstore_s_vi16, X86vtruncs,
9906                                  X86vmtruncs>;
9907defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9908                                  select_truncus, SchedWriteVecTruncate,
9909                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9910                                  X86vtruncus, X86vmtruncus>;
9911
9912defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9913                                  SchedWriteVecTruncate, truncstorevi8,
9914                                  masked_truncstorevi8, X86vtrunc,
9915                                  X86vmtrunc>;
9916defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9917                                  SchedWriteVecTruncate, truncstore_s_vi8,
9918                                  masked_truncstore_s_vi8, X86vtruncs,
9919                                  X86vmtruncs>;
9920defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9921                                  select_truncus, SchedWriteVecTruncate,
9922                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9923                                  X86vtruncus, X86vmtruncus>;
9924
9925let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
9926def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9927         (v8i16 (EXTRACT_SUBREG
9928                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9929                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9930def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9931         (v4i32 (EXTRACT_SUBREG
9932                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9933                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9934}
9935
9936let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
9937def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9938         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9939                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9940}
9941
9942// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9943multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9944                           X86VectorVTInfo DestInfo,
9945                           X86VectorVTInfo SrcInfo> {
9946  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9947                                 DestInfo.RC:$src0,
9948                                 SrcInfo.KRCWM:$mask)),
9949            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9950                                                 SrcInfo.KRCWM:$mask,
9951                                                 SrcInfo.RC:$src)>;
9952
9953  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9954                                 DestInfo.ImmAllZerosV,
9955                                 SrcInfo.KRCWM:$mask)),
9956            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9957                                                  SrcInfo.RC:$src)>;
9958}
9959
9960let Predicates = [HasVLX] in {
9961defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9962defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9963defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9964}
9965
9966let Predicates = [HasAVX512] in {
9967defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9968defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9969defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9970
9971defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9972defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9973defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9974
9975defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9976defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9977defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9978}
9979
9980multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9981              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9982              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9983  let ExeDomain = DestInfo.ExeDomain in {
9984  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9985                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9986                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9987                  EVEX, Sched<[sched]>;
9988
9989  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9990                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9991                  (DestInfo.VT (LdFrag addr:$src))>,
9992                EVEX, Sched<[sched.Folded]>;
9993  }
9994}
9995
9996multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9997          SDNode OpNode, SDNode InVecNode, string ExtTy,
9998          X86SchedWriteWidths sched,
9999          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10000  let Predicates = [HasVLX, HasBWI] in {
10001    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10002                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10003                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
10004
10005    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10006                    v16i8x_info, i128mem, LdFrag, OpNode>,
10007                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
10008  }
10009  let Predicates = [HasBWI] in {
10010    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10011                    v32i8x_info, i256mem, LdFrag, OpNode>,
10012                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
10013  }
10014}
10015
10016multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10017          SDNode OpNode, SDNode InVecNode, string ExtTy,
10018          X86SchedWriteWidths sched,
10019          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10020  let Predicates = [HasVLX, HasAVX512] in {
10021    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10022                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10023                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
10024
10025    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10026                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10027                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
10028  }
10029  let Predicates = [HasAVX512] in {
10030    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10031                   v16i8x_info, i128mem, LdFrag, OpNode>,
10032                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
10033  }
10034}
10035
10036multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10037                              SDNode InVecNode, string ExtTy,
10038                              X86SchedWriteWidths sched,
10039                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10040  let Predicates = [HasVLX, HasAVX512] in {
10041    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10042                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10043                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
10044
10045    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10046                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10047                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
10048  }
10049  let Predicates = [HasAVX512] in {
10050    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10051                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10052                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
10053  }
10054}
10055
10056multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10057         SDNode OpNode, SDNode InVecNode, string ExtTy,
10058         X86SchedWriteWidths sched,
10059         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10060  let Predicates = [HasVLX, HasAVX512] in {
10061    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10062                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10063                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
10064
10065    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10066                   v8i16x_info, i128mem, LdFrag, OpNode>,
10067                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
10068  }
10069  let Predicates = [HasAVX512] in {
10070    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10071                   v16i16x_info, i256mem, LdFrag, OpNode>,
10072                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
10073  }
10074}
10075
10076multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10077         SDNode OpNode, SDNode InVecNode, string ExtTy,
10078         X86SchedWriteWidths sched,
10079         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10080  let Predicates = [HasVLX, HasAVX512] in {
10081    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10082                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10083                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
10084
10085    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10086                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10087                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
10088  }
10089  let Predicates = [HasAVX512] in {
10090    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10091                   v8i16x_info, i128mem, LdFrag, OpNode>,
10092                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
10093  }
10094}
10095
10096multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10097         SDNode OpNode, SDNode InVecNode, string ExtTy,
10098         X86SchedWriteWidths sched,
10099         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10100
10101  let Predicates = [HasVLX, HasAVX512] in {
10102    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10103                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10104                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10105
10106    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10107                   v4i32x_info, i128mem, LdFrag, OpNode>,
10108                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10109  }
10110  let Predicates = [HasAVX512] in {
10111    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10112                   v8i32x_info, i256mem, LdFrag, OpNode>,
10113                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10114  }
10115}
10116
10117defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10118defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10119defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10120defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10121defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10122defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10123
10124defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10125defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10126defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10127defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10128defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10129defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10130
10131
10132// Patterns that we also need any extend versions of. aext_vector_inreg
10133// is currently legalized to zext_vector_inreg.
10134multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10135  // 256-bit patterns
10136  let Predicates = [HasVLX, HasBWI] in {
10137    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10138              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10139  }
10140
10141  let Predicates = [HasVLX] in {
10142    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10143              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10144
10145    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10146              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10147  }
10148
10149  // 512-bit patterns
10150  let Predicates = [HasBWI] in {
10151    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10152              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10153  }
10154  let Predicates = [HasAVX512] in {
10155    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10156              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10157    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10158              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10159
10160    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10161              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10162
10163    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10164              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10165  }
10166}
10167
10168multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10169                                 SDNode InVecOp> :
10170    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10171  // 128-bit patterns
10172  let Predicates = [HasVLX, HasBWI] in {
10173  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10174            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10175  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10176            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10177  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10178            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10179  }
10180  let Predicates = [HasVLX] in {
10181  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10182            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10183  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10184            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10185
10186  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10187            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10188
10189  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10190            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10191  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10192            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10193  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10194            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10195
10196  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10197            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10198  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10199            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10200
10201  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10202            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10203  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10204            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10205  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10206            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10207  }
10208  let Predicates = [HasVLX] in {
10209  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10210            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10211  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10212            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10213  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10214            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10215
10216  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10217            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10218  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10219            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10220
10221  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10222            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10223  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10224            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10225  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10226            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10227  }
10228  // 512-bit patterns
10229  let Predicates = [HasAVX512] in {
10230  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10231            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10232  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10233            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10234  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10235            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10236  }
10237}
10238
10239defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10240defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10241
10242// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10243// ext+trunc aggressively making it impossible to legalize the DAG to this
10244// pattern directly.
10245let Predicates = [HasAVX512, NoBWI] in {
10246def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10247         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10248def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10249         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10250}
10251
10252//===----------------------------------------------------------------------===//
10253// GATHER - SCATTER Operations
10254
10255// FIXME: Improve scheduling of gather/scatter instructions.
10256multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10257                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10258  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10259      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10260  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10261            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10262            !strconcat(OpcodeStr#_.Suffix,
10263            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10264            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10265            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10266}
10267
10268multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10269                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10270  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10271                                      vy512xmem>, EVEX_V512, REX_W;
10272  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10273                                      vz512mem>, EVEX_V512, REX_W;
10274let Predicates = [HasVLX] in {
10275  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10276                              vx256xmem>, EVEX_V256, REX_W;
10277  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10278                              vy256xmem>, EVEX_V256, REX_W;
10279  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10280                              vx128xmem>, EVEX_V128, REX_W;
10281  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10282                              vx128xmem>, EVEX_V128, REX_W;
10283}
10284}
10285
10286multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10287                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10288  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10289                                       EVEX_V512;
10290  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10291                                       EVEX_V512;
10292let Predicates = [HasVLX] in {
10293  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10294                                          vy256xmem>, EVEX_V256;
10295  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10296                                          vy128xmem>, EVEX_V256;
10297  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10298                                          vx128xmem>, EVEX_V128;
10299  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10300                                          vx64xmem, VK2WM>, EVEX_V128;
10301}
10302}
10303
10304
10305defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10306               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10307
10308defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10309                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10310
10311multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10312                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10313
10314let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10315    hasSideEffects = 0 in
10316
10317  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10318            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10319            !strconcat(OpcodeStr#_.Suffix,
10320            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10321            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10322            Sched<[WriteStore]>;
10323}
10324
10325multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10326                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10327  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10328                                      vy512xmem>, EVEX_V512, REX_W;
10329  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10330                                      vz512mem>, EVEX_V512, REX_W;
10331let Predicates = [HasVLX] in {
10332  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10333                              vx256xmem>, EVEX_V256, REX_W;
10334  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10335                              vy256xmem>, EVEX_V256, REX_W;
10336  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10337                              vx128xmem>, EVEX_V128, REX_W;
10338  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10339                              vx128xmem>, EVEX_V128, REX_W;
10340}
10341}
10342
10343multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10344                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10345  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10346                                       EVEX_V512;
10347  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10348                                       EVEX_V512;
10349let Predicates = [HasVLX] in {
10350  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10351                                          vy256xmem>, EVEX_V256;
10352  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10353                                          vy128xmem>, EVEX_V256;
10354  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10355                                          vx128xmem>, EVEX_V128;
10356  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10357                                          vx64xmem, VK2WM>, EVEX_V128;
10358}
10359}
10360
10361defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10362               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10363
10364defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10365                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10366
10367// prefetch
10368multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10369                       RegisterClass KRC, X86MemOperand memop> {
10370  let mayLoad = 1, mayStore = 1 in
10371  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10372            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10373            EVEX, EVEX_K, Sched<[WriteLoad]>;
10374}
10375
10376defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10377                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10378
10379defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10380                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10381
10382defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10383                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10384
10385defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10386                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10387
10388defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10389                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10390
10391defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10392                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10393
10394defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10395                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10396
10397defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10398                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10399
10400defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10401                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10402
10403defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10404                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10405
10406defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10407                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10408
10409defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10410                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10411
10412defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10413                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10414
10415defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10416                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10417
10418defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10419                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10420
10421defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10422                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10423
10424multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10425def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10426                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10427                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10428                  EVEX, Sched<[Sched]>;
10429}
10430
10431multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10432                                 string OpcodeStr, Predicate prd> {
10433let Predicates = [prd] in
10434  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10435
10436  let Predicates = [prd, HasVLX] in {
10437    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10438    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10439  }
10440}
10441
10442defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10443defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10444defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10445defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10446
10447multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10448    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10449                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10450                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10451                        EVEX, Sched<[WriteMove]>;
10452}
10453
10454// Use 512bit version to implement 128/256 bit in case NoVLX.
10455multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10456                                           X86VectorVTInfo _,
10457                                           string Name> {
10458
10459  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10460            (_.KVT (COPY_TO_REGCLASS
10461                     (!cast<Instruction>(Name#"Zrr")
10462                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10463                                      _.RC:$src, _.SubRegIdx)),
10464                   _.KRC))>;
10465}
10466
10467multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10468                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10469  let Predicates = [prd] in
10470    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10471                                            EVEX_V512;
10472
10473  let Predicates = [prd, HasVLX] in {
10474    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10475                                              EVEX_V256;
10476    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10477                                               EVEX_V128;
10478  }
10479  let Predicates = [prd, NoVLX, HasEVEX512] in {
10480    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10481    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10482  }
10483}
10484
10485defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10486                                              avx512vl_i8_info, HasBWI>;
10487defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10488                                              avx512vl_i16_info, HasBWI>, REX_W;
10489defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10490                                              avx512vl_i32_info, HasDQI>;
10491defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10492                                              avx512vl_i64_info, HasDQI>, REX_W;
10493
10494// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10495// is available, but BWI is not. We can't handle this in lowering because
10496// a target independent DAG combine likes to combine sext and trunc.
10497let Predicates = [HasDQI, NoBWI] in {
10498  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10499            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10500  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10501            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10502}
10503
10504let Predicates = [HasDQI, NoBWI, HasVLX] in {
10505  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10506            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10507}
10508
10509//===----------------------------------------------------------------------===//
10510// AVX-512 - COMPRESS and EXPAND
10511//
10512
10513multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10514                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10515  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10516              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10517              (null_frag)>, AVX5128IBase,
10518              Sched<[sched]>;
10519
10520  let mayStore = 1, hasSideEffects = 0 in
10521  def mr : AVX5128I<opc, MRMDestMem, (outs),
10522              (ins _.MemOp:$dst, _.RC:$src),
10523              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10524              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10525              Sched<[sched.Folded]>;
10526
10527  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10528              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10529              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10530              []>,
10531              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10532              Sched<[sched.Folded]>;
10533}
10534
10535multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10536  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10537            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10538                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10539
10540  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10541            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10542                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10543  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10544            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10545                            _.KRCWM:$mask, _.RC:$src)>;
10546}
10547
10548multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10549                                 X86FoldableSchedWrite sched,
10550                                 AVX512VLVectorVTInfo VTInfo,
10551                                 Predicate Pred = HasAVX512> {
10552  let Predicates = [Pred] in
10553  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10554           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10555
10556  let Predicates = [Pred, HasVLX] in {
10557    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10558                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10559    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10560                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10561  }
10562}
10563
10564// FIXME: Is there a better scheduler class for VPCOMPRESS?
10565defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10566                                          avx512vl_i32_info>, EVEX;
10567defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10568                                          avx512vl_i64_info>, EVEX, REX_W;
10569defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10570                                          avx512vl_f32_info>, EVEX;
10571defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10572                                          avx512vl_f64_info>, EVEX, REX_W;
10573
10574// expand
10575multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10576                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10577  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10578              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10579              (null_frag)>, AVX5128IBase,
10580              Sched<[sched]>;
10581
10582  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10583              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10584              (null_frag)>,
10585            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10586            Sched<[sched.Folded, sched.ReadAfterFold]>;
10587}
10588
10589multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10590
10591  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10592            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10593                                        _.KRCWM:$mask, addr:$src)>;
10594
10595  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10596            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10597                                        _.KRCWM:$mask, addr:$src)>;
10598
10599  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10600                                               (_.VT _.RC:$src0))),
10601            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10602                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10603
10604  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10605            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10606                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10607  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10608            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10609                            _.KRCWM:$mask, _.RC:$src)>;
10610}
10611
10612multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10613                               X86FoldableSchedWrite sched,
10614                               AVX512VLVectorVTInfo VTInfo,
10615                               Predicate Pred = HasAVX512> {
10616  let Predicates = [Pred] in
10617  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10618           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10619
10620  let Predicates = [Pred, HasVLX] in {
10621    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10622                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10623    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10624                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10625  }
10626}
10627
10628// FIXME: Is there a better scheduler class for VPEXPAND?
10629defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10630                                      avx512vl_i32_info>, EVEX;
10631defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10632                                      avx512vl_i64_info>, EVEX, REX_W;
10633defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10634                                      avx512vl_f32_info>, EVEX;
10635defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10636                                      avx512vl_f64_info>, EVEX, REX_W;
10637
10638//handle instruction  reg_vec1 = op(reg_vec,imm)
10639//                               op(mem_vec,imm)
10640//                               op(broadcast(eltVt),imm)
10641//all instruction created with FROUND_CURRENT
10642multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10643                                      SDPatternOperator OpNode,
10644                                      SDPatternOperator MaskOpNode,
10645                                      X86FoldableSchedWrite sched,
10646                                      X86VectorVTInfo _> {
10647  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10648  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10649                      (ins _.RC:$src1, i32u8imm:$src2),
10650                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10651                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10652                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10653                      Sched<[sched]>;
10654  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10655                    (ins _.MemOp:$src1, i32u8imm:$src2),
10656                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10657                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10658                            (i32 timm:$src2)),
10659                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10660                                (i32 timm:$src2))>,
10661                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10662  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10663                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10664                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10665                    "${src1}"#_.BroadcastStr#", $src2",
10666                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10667                            (i32 timm:$src2)),
10668                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10669                                (i32 timm:$src2))>, EVEX_B,
10670                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10671  }
10672}
10673
10674//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10675multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10676                                          SDNode OpNode, X86FoldableSchedWrite sched,
10677                                          X86VectorVTInfo _> {
10678  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10679  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680                      (ins _.RC:$src1, i32u8imm:$src2),
10681                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10682                      "$src1, {sae}, $src2",
10683                      (OpNode (_.VT _.RC:$src1),
10684                              (i32 timm:$src2))>,
10685                      EVEX_B, Sched<[sched]>;
10686}
10687
10688multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10689            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10690            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10691            Predicate prd>{
10692  let Predicates = [prd] in {
10693    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10694                                           sched.ZMM, _.info512>,
10695                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10696                                               sched.ZMM, _.info512>, EVEX_V512;
10697  }
10698  let Predicates = [prd, HasVLX] in {
10699    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10700                                           sched.XMM, _.info128>, EVEX_V128;
10701    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10702                                           sched.YMM, _.info256>, EVEX_V256;
10703  }
10704}
10705
10706//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10707//                               op(reg_vec2,mem_vec,imm)
10708//                               op(reg_vec2,broadcast(eltVt),imm)
10709//all instruction created with FROUND_CURRENT
10710multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10711                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10712  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10713  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10714                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10715                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10716                      (OpNode (_.VT _.RC:$src1),
10717                              (_.VT _.RC:$src2),
10718                              (i32 timm:$src3))>,
10719                      Sched<[sched]>;
10720  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10721                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10722                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10723                    (OpNode (_.VT _.RC:$src1),
10724                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10725                            (i32 timm:$src3))>,
10726                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10727  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10728                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10729                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10730                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10731                    (OpNode (_.VT _.RC:$src1),
10732                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10733                            (i32 timm:$src3))>, EVEX_B,
10734                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10735  }
10736}
10737
10738//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10739//                               op(reg_vec2,mem_vec,imm)
10740multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10741                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10742                              X86VectorVTInfo SrcInfo>{
10743  let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10744  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10745                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10746                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10747                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10748                               (SrcInfo.VT SrcInfo.RC:$src2),
10749                               (i8 timm:$src3)))>,
10750                  Sched<[sched]>;
10751  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10752                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10753                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10754                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10755                             (SrcInfo.VT (bitconvert
10756                                                (SrcInfo.LdFrag addr:$src2))),
10757                             (i8 timm:$src3)))>,
10758                Sched<[sched.Folded, sched.ReadAfterFold]>;
10759  }
10760}
10761
10762//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10763//                               op(reg_vec2,mem_vec,imm)
10764//                               op(reg_vec2,broadcast(eltVt),imm)
10765multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10766                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10767  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10768
10769  let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10770  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10771                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10772                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10773                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10774                    (OpNode (_.VT _.RC:$src1),
10775                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10776                            (i8 timm:$src3))>, EVEX_B,
10777                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10778}
10779
10780//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10781//                                      op(reg_vec2,mem_scalar,imm)
10782multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10783                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10784  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10785  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10786                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10787                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10788                      (OpNode (_.VT _.RC:$src1),
10789                              (_.VT _.RC:$src2),
10790                              (i32 timm:$src3))>,
10791                      Sched<[sched]>;
10792  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10793                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10794                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10795                    (OpNode (_.VT _.RC:$src1),
10796                            (_.ScalarIntMemFrags addr:$src2),
10797                            (i32 timm:$src3))>,
10798                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10799  }
10800}
10801
10802//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10803multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10804                                    SDNode OpNode, X86FoldableSchedWrite sched,
10805                                    X86VectorVTInfo _> {
10806  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10807  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10808                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10809                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10810                      "$src1, $src2, {sae}, $src3",
10811                      (OpNode (_.VT _.RC:$src1),
10812                              (_.VT _.RC:$src2),
10813                              (i32 timm:$src3))>,
10814                      EVEX_B, Sched<[sched]>;
10815}
10816
10817//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10818multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10819                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10820  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10821  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10822                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10823                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10824                      "$src1, $src2, {sae}, $src3",
10825                      (OpNode (_.VT _.RC:$src1),
10826                              (_.VT _.RC:$src2),
10827                              (i32 timm:$src3))>,
10828                      EVEX_B, Sched<[sched]>;
10829}
10830
10831multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10832            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10833            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10834  let Predicates = [prd] in {
10835    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10836                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10837                                  EVEX_V512;
10838
10839  }
10840  let Predicates = [prd, HasVLX] in {
10841    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10842                                  EVEX_V128;
10843    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10844                                  EVEX_V256;
10845  }
10846}
10847
10848multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10849                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10850                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10851  let Predicates = [Pred] in {
10852    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10853                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10854  }
10855  let Predicates = [Pred, HasVLX] in {
10856    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10857                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10858    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10859                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10860  }
10861}
10862
10863multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10864                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10865                                  Predicate Pred = HasAVX512> {
10866  let Predicates = [Pred] in {
10867    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10868                                EVEX_V512;
10869  }
10870  let Predicates = [Pred, HasVLX] in {
10871    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10872                                EVEX_V128;
10873    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10874                                EVEX_V256;
10875  }
10876}
10877
10878multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10879                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10880                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10881  let Predicates = [prd] in {
10882     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10883              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10884  }
10885}
10886
10887multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10888                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10889                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10890                    X86SchedWriteWidths sched, Predicate prd>{
10891  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10892                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10893                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10894  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10895                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10896                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10897  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10898                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10899                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10900}
10901
10902defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10903                              X86VReduce, X86VReduce, X86VReduceSAE,
10904                              SchedWriteFRnd, HasDQI>;
10905defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10906                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10907                              SchedWriteFRnd, HasAVX512>;
10908defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10909                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10910                              SchedWriteFRnd, HasAVX512>;
10911
10912defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10913                                                0x50, X86VRange, X86VRangeSAE,
10914                                                SchedWriteFAdd, HasDQI>,
10915      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10916defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10917                                                0x50, X86VRange, X86VRangeSAE,
10918                                                SchedWriteFAdd, HasDQI>,
10919      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10920
10921defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10922      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10923      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10924defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10925      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10926      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10927
10928defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10929      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10930      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10931defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10932      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10933      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10934defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10935      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10936      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10937
10938defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10939      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10940      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10941defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10942      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10943      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10944defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10945      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10946      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10947
10948multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10949                                          X86FoldableSchedWrite sched,
10950                                          X86VectorVTInfo _,
10951                                          X86VectorVTInfo CastInfo> {
10952  let ExeDomain = _.ExeDomain in {
10953  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10954                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10955                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10956                  (_.VT (bitconvert
10957                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10958                                                  (i8 timm:$src3)))))>,
10959                  Sched<[sched]>;
10960  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10961                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10962                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10963                (_.VT
10964                 (bitconvert
10965                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10966                                           (CastInfo.LdFrag addr:$src2),
10967                                           (i8 timm:$src3)))))>,
10968                Sched<[sched.Folded, sched.ReadAfterFold]>;
10969  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10970                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10971                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10972                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10973                    (_.VT
10974                     (bitconvert
10975                      (CastInfo.VT
10976                       (X86Shuf128 _.RC:$src1,
10977                                   (_.BroadcastLdFrag addr:$src2),
10978                                   (i8 timm:$src3)))))>, EVEX_B,
10979                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10980  }
10981}
10982
10983multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10984                                   AVX512VLVectorVTInfo _,
10985                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10986  let Predicates = [HasAVX512] in
10987  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10988                                          _.info512, CastInfo.info512>, EVEX_V512;
10989
10990  let Predicates = [HasAVX512, HasVLX] in
10991  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10992                                             _.info256, CastInfo.info256>, EVEX_V256;
10993}
10994
10995defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10996      avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10997defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10998      avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10999defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11000      avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
11001defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11002      avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
11003
11004multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11005                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11006  let ExeDomain = _.ExeDomain in {
11007  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11008                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11009                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11010                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11011                  Sched<[sched]>;
11012  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11013                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11014                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11015                (_.VT (X86VAlign _.RC:$src1,
11016                                 (bitconvert (_.LdFrag addr:$src2)),
11017                                 (i8 timm:$src3)))>,
11018                Sched<[sched.Folded, sched.ReadAfterFold]>;
11019
11020  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11021                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11022                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11023                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11024                   (X86VAlign _.RC:$src1,
11025                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11026                              (i8 timm:$src3))>, EVEX_B,
11027                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11028  }
11029}
11030
11031multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11032                                AVX512VLVectorVTInfo _> {
11033  let Predicates = [HasAVX512] in {
11034    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11035                                AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
11036  }
11037  let Predicates = [HasAVX512, HasVLX] in {
11038    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11039                                AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
11040    // We can't really override the 256-bit version so change it back to unset.
11041    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11042                                AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
11043  }
11044}
11045
11046defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11047                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11048defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11049                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11050                                   REX_W;
11051
11052defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11053                                         SchedWriteShuffle, avx512vl_i8_info,
11054                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11055
11056// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11057// into vpalignr.
11058def ValignqImm32XForm : SDNodeXForm<timm, [{
11059  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11060}]>;
11061def ValignqImm8XForm : SDNodeXForm<timm, [{
11062  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11063}]>;
11064def ValigndImm8XForm : SDNodeXForm<timm, [{
11065  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11066}]>;
11067
11068multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11069                                        X86VectorVTInfo From, X86VectorVTInfo To,
11070                                        SDNodeXForm ImmXForm> {
11071  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11072                                 (bitconvert
11073                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11074                                                   timm:$src3))),
11075                                 To.RC:$src0)),
11076            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11077                                                  To.RC:$src1, To.RC:$src2,
11078                                                  (ImmXForm timm:$src3))>;
11079
11080  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11081                                 (bitconvert
11082                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11083                                                   timm:$src3))),
11084                                 To.ImmAllZerosV)),
11085            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11086                                                   To.RC:$src1, To.RC:$src2,
11087                                                   (ImmXForm timm:$src3))>;
11088
11089  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11090                                 (bitconvert
11091                                  (From.VT (OpNode From.RC:$src1,
11092                                                   (From.LdFrag addr:$src2),
11093                                           timm:$src3))),
11094                                 To.RC:$src0)),
11095            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11096                                                  To.RC:$src1, addr:$src2,
11097                                                  (ImmXForm timm:$src3))>;
11098
11099  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11100                                 (bitconvert
11101                                  (From.VT (OpNode From.RC:$src1,
11102                                                   (From.LdFrag addr:$src2),
11103                                           timm:$src3))),
11104                                 To.ImmAllZerosV)),
11105            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11106                                                   To.RC:$src1, addr:$src2,
11107                                                   (ImmXForm timm:$src3))>;
11108}
11109
11110multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11111                                           X86VectorVTInfo From,
11112                                           X86VectorVTInfo To,
11113                                           SDNodeXForm ImmXForm> :
11114      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11115  def : Pat<(From.VT (OpNode From.RC:$src1,
11116                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11117                             timm:$src3)),
11118            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11119                                                  (ImmXForm timm:$src3))>;
11120
11121  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11122                                 (bitconvert
11123                                  (From.VT (OpNode From.RC:$src1,
11124                                           (bitconvert
11125                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11126                                           timm:$src3))),
11127                                 To.RC:$src0)),
11128            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11129                                                   To.RC:$src1, addr:$src2,
11130                                                   (ImmXForm timm:$src3))>;
11131
11132  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11133                                 (bitconvert
11134                                  (From.VT (OpNode From.RC:$src1,
11135                                           (bitconvert
11136                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11137                                           timm:$src3))),
11138                                 To.ImmAllZerosV)),
11139            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11140                                                    To.RC:$src1, addr:$src2,
11141                                                    (ImmXForm timm:$src3))>;
11142}
11143
11144let Predicates = [HasAVX512] in {
11145  // For 512-bit we lower to the widest element type we can. So we only need
11146  // to handle converting valignq to valignd.
11147  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11148                                         v16i32_info, ValignqImm32XForm>;
11149}
11150
11151let Predicates = [HasVLX] in {
11152  // For 128-bit we lower to the widest element type we can. So we only need
11153  // to handle converting valignq to valignd.
11154  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11155                                         v4i32x_info, ValignqImm32XForm>;
11156  // For 256-bit we lower to the widest element type we can. So we only need
11157  // to handle converting valignq to valignd.
11158  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11159                                         v8i32x_info, ValignqImm32XForm>;
11160}
11161
11162let Predicates = [HasVLX, HasBWI] in {
11163  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11164  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11165                                      v16i8x_info, ValignqImm8XForm>;
11166  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11167                                      v16i8x_info, ValigndImm8XForm>;
11168}
11169
11170defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11171                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11172                EVEX_CD8<8, CD8VF>;
11173
11174multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11175                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11176  let ExeDomain = _.ExeDomain in {
11177  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11178                    (ins _.RC:$src1), OpcodeStr,
11179                    "$src1", "$src1",
11180                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11181                    Sched<[sched]>;
11182
11183  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11184                  (ins _.MemOp:$src1), OpcodeStr,
11185                  "$src1", "$src1",
11186                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11187            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11188            Sched<[sched.Folded]>;
11189  }
11190}
11191
11192multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11193                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11194           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11195  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11196                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11197                  "${src1}"#_.BroadcastStr,
11198                  "${src1}"#_.BroadcastStr,
11199                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11200             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11201             Sched<[sched.Folded]>;
11202}
11203
11204multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11205                              X86SchedWriteWidths sched,
11206                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11207  let Predicates = [prd] in
11208    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11209                             EVEX_V512;
11210
11211  let Predicates = [prd, HasVLX] in {
11212    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11213                              EVEX_V256;
11214    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11215                              EVEX_V128;
11216  }
11217}
11218
11219multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11220                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11221                               Predicate prd> {
11222  let Predicates = [prd] in
11223    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11224                              EVEX_V512;
11225
11226  let Predicates = [prd, HasVLX] in {
11227    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11228                                 EVEX_V256;
11229    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11230                                 EVEX_V128;
11231  }
11232}
11233
11234multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11235                                 SDNode OpNode, X86SchedWriteWidths sched,
11236                                 Predicate prd> {
11237  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11238                               avx512vl_i64_info, prd>, REX_W;
11239  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11240                               avx512vl_i32_info, prd>;
11241}
11242
11243multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11244                                 SDNode OpNode, X86SchedWriteWidths sched,
11245                                 Predicate prd> {
11246  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11247                              avx512vl_i16_info, prd>, WIG;
11248  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11249                              avx512vl_i8_info, prd>, WIG;
11250}
11251
11252multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11253                                  bits<8> opc_d, bits<8> opc_q,
11254                                  string OpcodeStr, SDNode OpNode,
11255                                  X86SchedWriteWidths sched> {
11256  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11257                                    HasAVX512>,
11258              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11259                                    HasBWI>;
11260}
11261
11262defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11263                                    SchedWriteVecALU>;
11264
11265// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11266let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11267  def : Pat<(v4i64 (abs VR256X:$src)),
11268            (EXTRACT_SUBREG
11269                (VPABSQZrr
11270                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11271             sub_ymm)>;
11272  def : Pat<(v2i64 (abs VR128X:$src)),
11273            (EXTRACT_SUBREG
11274                (VPABSQZrr
11275                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11276             sub_xmm)>;
11277}
11278
11279// Use 512bit version to implement 128/256 bit.
11280multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11281                                 AVX512VLVectorVTInfo _, Predicate prd> {
11282  let Predicates = [prd, NoVLX, HasEVEX512] in {
11283    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11284              (EXTRACT_SUBREG
11285                (!cast<Instruction>(InstrStr # "Zrr")
11286                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11287                                 _.info256.RC:$src1,
11288                                 _.info256.SubRegIdx)),
11289              _.info256.SubRegIdx)>;
11290
11291    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11292              (EXTRACT_SUBREG
11293                (!cast<Instruction>(InstrStr # "Zrr")
11294                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11295                                 _.info128.RC:$src1,
11296                                 _.info128.SubRegIdx)),
11297              _.info128.SubRegIdx)>;
11298  }
11299}
11300
11301defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11302                                        SchedWriteVecIMul, HasCDI>;
11303
11304// FIXME: Is there a better scheduler class for VPCONFLICT?
11305defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11306                                        SchedWriteVecALU, HasCDI>;
11307
11308// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11309defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11310defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11311
11312//===---------------------------------------------------------------------===//
11313// Counts number of ones - VPOPCNTD and VPOPCNTQ
11314//===---------------------------------------------------------------------===//
11315
11316// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11317defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11318                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11319
11320defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11321defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11322
11323//===---------------------------------------------------------------------===//
11324// Replicate Single FP - MOVSHDUP and MOVSLDUP
11325//===---------------------------------------------------------------------===//
11326
11327multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11328                            X86SchedWriteWidths sched> {
11329  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11330                                      avx512vl_f32_info, HasAVX512>, TB, XS;
11331}
11332
11333defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11334                                  SchedWriteFShuffle>;
11335defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11336                                  SchedWriteFShuffle>;
11337
11338//===----------------------------------------------------------------------===//
11339// AVX-512 - MOVDDUP
11340//===----------------------------------------------------------------------===//
11341
11342multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11343                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11344  let ExeDomain = _.ExeDomain in {
11345  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11346                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11347                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11348                   Sched<[sched]>;
11349  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11350                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11351                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11352                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11353                 Sched<[sched.Folded]>;
11354  }
11355}
11356
11357multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11358                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11359  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11360                           VTInfo.info512>, EVEX_V512;
11361
11362  let Predicates = [HasAVX512, HasVLX] in {
11363    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11364                                VTInfo.info256>, EVEX_V256;
11365    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11366                                   VTInfo.info128>, EVEX_V128;
11367  }
11368}
11369
11370multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11371                          X86SchedWriteWidths sched> {
11372  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11373                                        avx512vl_f64_info>, TB, XD, REX_W;
11374}
11375
11376defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11377
11378let Predicates = [HasVLX] in {
11379def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11380          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11381
11382def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11383                        (v2f64 VR128X:$src0)),
11384          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11385                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11386def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11387                        immAllZerosV),
11388          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11389}
11390
11391//===----------------------------------------------------------------------===//
11392// AVX-512 - Unpack Instructions
11393//===----------------------------------------------------------------------===//
11394
11395let Uses = []<Register>, mayRaiseFPException = 0 in {
11396defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11397                                 SchedWriteFShuffleSizes, 0, 1>;
11398defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11399                                 SchedWriteFShuffleSizes>;
11400}
11401
11402defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11403                                       SchedWriteShuffle, HasBWI>;
11404defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11405                                       SchedWriteShuffle, HasBWI>;
11406defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11407                                       SchedWriteShuffle, HasBWI>;
11408defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11409                                       SchedWriteShuffle, HasBWI>;
11410
11411defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11412                                       SchedWriteShuffle, HasAVX512>;
11413defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11414                                       SchedWriteShuffle, HasAVX512>;
11415defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11416                                        SchedWriteShuffle, HasAVX512>;
11417defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11418                                        SchedWriteShuffle, HasAVX512>;
11419
11420//===----------------------------------------------------------------------===//
11421// AVX-512 - Extract & Insert Integer Instructions
11422//===----------------------------------------------------------------------===//
11423
11424multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11425                                                            X86VectorVTInfo _> {
11426  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11427              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11428              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11429              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11430                       addr:$dst)]>,
11431              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11432}
11433
11434multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11435  let Predicates = [HasBWI] in {
11436    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11437                  (ins _.RC:$src1, u8imm:$src2),
11438                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11439                  [(set GR32orGR64:$dst,
11440                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11441                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11442
11443    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11444  }
11445}
11446
11447multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11448  let Predicates = [HasBWI] in {
11449    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11450                  (ins _.RC:$src1, u8imm:$src2),
11451                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11452                  [(set GR32orGR64:$dst,
11453                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11454                  EVEX, TB, PD, Sched<[WriteVecExtract]>;
11455
11456    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11457    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11458                   (ins _.RC:$src1, u8imm:$src2),
11459                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11460                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11461
11462    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11463  }
11464}
11465
11466multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11467                                                            RegisterClass GRC> {
11468  let Predicates = [HasDQI] in {
11469    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11470                  (ins _.RC:$src1, u8imm:$src2),
11471                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11472                  [(set GRC:$dst,
11473                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11474                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11475
11476    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11477                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11478                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11479                [(store (extractelt (_.VT _.RC:$src1),
11480                                    imm:$src2),addr:$dst)]>,
11481                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11482                Sched<[WriteVecExtractSt]>;
11483  }
11484}
11485
11486defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11487defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11488defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11489defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11490
11491multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11492                                            X86VectorVTInfo _, PatFrag LdFrag,
11493                                            SDPatternOperator immoperator> {
11494  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11495      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11496      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11497      [(set _.RC:$dst,
11498          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11499      EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11500}
11501
11502multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11503                                            X86VectorVTInfo _, PatFrag LdFrag> {
11504  let Predicates = [HasBWI] in {
11505    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11506        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11507        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11508        [(set _.RC:$dst,
11509            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11510        Sched<[WriteVecInsert]>;
11511
11512    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11513  }
11514}
11515
11516multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11517                                         X86VectorVTInfo _, RegisterClass GRC> {
11518  let Predicates = [HasDQI] in {
11519    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11520        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11521        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11522        [(set _.RC:$dst,
11523            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11524        EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11525
11526    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11527                                    _.ScalarLdFrag, imm>, TA, PD;
11528  }
11529}
11530
11531defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11532                                     extloadi8>, TA, PD, WIG;
11533defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11534                                     extloadi16>, TB, PD, WIG;
11535defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11536defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11537
11538let Predicates = [HasAVX512, NoBWI] in {
11539  def : Pat<(X86pinsrb VR128:$src1,
11540                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11541                       timm:$src3),
11542            (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11543                       timm:$src3)>;
11544}
11545
11546let Predicates = [HasBWI] in {
11547  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11548            (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11549                        GR8:$src2, sub_8bit), timm:$src3)>;
11550  def : Pat<(X86pinsrb VR128:$src1,
11551                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11552                       timm:$src3),
11553            (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11554                        timm:$src3)>;
11555}
11556
11557// Always select FP16 instructions if available.
11558let Predicates = [HasBWI], AddedComplexity = -10 in {
11559  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11560  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11561  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11562  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11563}
11564
11565//===----------------------------------------------------------------------===//
11566// VSHUFPS - VSHUFPD Operations
11567//===----------------------------------------------------------------------===//
11568
11569multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11570  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11571                                    SchedWriteFShuffle>,
11572                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11573                                    TA, EVEX, VVVV;
11574}
11575
11576defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11577defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11578
11579//===----------------------------------------------------------------------===//
11580// AVX-512 - Byte shift Left/Right
11581//===----------------------------------------------------------------------===//
11582
11583multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11584                               Format MRMm, string OpcodeStr,
11585                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11586  def ri : AVX512<opc, MRMr,
11587             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11588             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11589             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11590             Sched<[sched]>;
11591  def mi : AVX512<opc, MRMm,
11592           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11593           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11594           [(set _.RC:$dst,(_.VT (OpNode
11595                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11596                                 (i8 timm:$src2))))]>,
11597           Sched<[sched.Folded, sched.ReadAfterFold]>;
11598}
11599
11600multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11601                                   Format MRMm, string OpcodeStr,
11602                                   X86SchedWriteWidths sched, Predicate prd>{
11603  let Predicates = [prd] in
11604    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11605                                 sched.ZMM, v64i8_info>, EVEX_V512;
11606  let Predicates = [prd, HasVLX] in {
11607    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11608                                    sched.YMM, v32i8x_info>, EVEX_V256;
11609    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11610                                    sched.XMM, v16i8x_info>, EVEX_V128;
11611  }
11612}
11613defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11614                                       SchedWriteShuffle, HasBWI>,
11615                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11616defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11617                                       SchedWriteShuffle, HasBWI>,
11618                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11619
11620multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11621                                string OpcodeStr, X86FoldableSchedWrite sched,
11622                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11623  let isCommutable = 1 in
11624  def rr : AVX512BI<opc, MRMSrcReg,
11625             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11626             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11627             [(set _dst.RC:$dst,(_dst.VT
11628                                (OpNode (_src.VT _src.RC:$src1),
11629                                        (_src.VT _src.RC:$src2))))]>,
11630             Sched<[sched]>;
11631  def rm : AVX512BI<opc, MRMSrcMem,
11632           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11633           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11634           [(set _dst.RC:$dst,(_dst.VT
11635                              (OpNode (_src.VT _src.RC:$src1),
11636                              (_src.VT (bitconvert
11637                                        (_src.LdFrag addr:$src2))))))]>,
11638           Sched<[sched.Folded, sched.ReadAfterFold]>;
11639}
11640
11641multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11642                                    string OpcodeStr, X86SchedWriteWidths sched,
11643                                    Predicate prd> {
11644  let Predicates = [prd] in
11645    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11646                                  v8i64_info, v64i8_info>, EVEX_V512;
11647  let Predicates = [prd, HasVLX] in {
11648    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11649                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11650    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11651                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11652  }
11653}
11654
11655defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11656                                        SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11657
11658// Transforms to swizzle an immediate to enable better matching when
11659// memory operand isn't in the right place.
11660def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11661  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11662  uint8_t Imm = N->getZExtValue();
11663  // Swap bits 1/4 and 3/6.
11664  uint8_t NewImm = Imm & 0xa5;
11665  if (Imm & 0x02) NewImm |= 0x10;
11666  if (Imm & 0x10) NewImm |= 0x02;
11667  if (Imm & 0x08) NewImm |= 0x40;
11668  if (Imm & 0x40) NewImm |= 0x08;
11669  return getI8Imm(NewImm, SDLoc(N));
11670}]>;
11671def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11672  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11673  uint8_t Imm = N->getZExtValue();
11674  // Swap bits 2/4 and 3/5.
11675  uint8_t NewImm = Imm & 0xc3;
11676  if (Imm & 0x04) NewImm |= 0x10;
11677  if (Imm & 0x10) NewImm |= 0x04;
11678  if (Imm & 0x08) NewImm |= 0x20;
11679  if (Imm & 0x20) NewImm |= 0x08;
11680  return getI8Imm(NewImm, SDLoc(N));
11681}]>;
11682def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11683  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11684  uint8_t Imm = N->getZExtValue();
11685  // Swap bits 1/2 and 5/6.
11686  uint8_t NewImm = Imm & 0x99;
11687  if (Imm & 0x02) NewImm |= 0x04;
11688  if (Imm & 0x04) NewImm |= 0x02;
11689  if (Imm & 0x20) NewImm |= 0x40;
11690  if (Imm & 0x40) NewImm |= 0x20;
11691  return getI8Imm(NewImm, SDLoc(N));
11692}]>;
11693def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11694  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11695  uint8_t Imm = N->getZExtValue();
11696  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11697  uint8_t NewImm = Imm & 0x81;
11698  if (Imm & 0x02) NewImm |= 0x04;
11699  if (Imm & 0x04) NewImm |= 0x10;
11700  if (Imm & 0x08) NewImm |= 0x40;
11701  if (Imm & 0x10) NewImm |= 0x02;
11702  if (Imm & 0x20) NewImm |= 0x08;
11703  if (Imm & 0x40) NewImm |= 0x20;
11704  return getI8Imm(NewImm, SDLoc(N));
11705}]>;
11706def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11707  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11708  uint8_t Imm = N->getZExtValue();
11709  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11710  uint8_t NewImm = Imm & 0x81;
11711  if (Imm & 0x02) NewImm |= 0x10;
11712  if (Imm & 0x04) NewImm |= 0x02;
11713  if (Imm & 0x08) NewImm |= 0x20;
11714  if (Imm & 0x10) NewImm |= 0x04;
11715  if (Imm & 0x20) NewImm |= 0x40;
11716  if (Imm & 0x40) NewImm |= 0x08;
11717  return getI8Imm(NewImm, SDLoc(N));
11718}]>;
11719
11720multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11721                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11722                          string Name>{
11723  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11724  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11725                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11726                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11727                      (OpNode (_.VT _.RC:$src1),
11728                              (_.VT _.RC:$src2),
11729                              (_.VT _.RC:$src3),
11730                              (i8 timm:$src4)), 1, 1>,
11731                      AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11732  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11733                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11734                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11735                    (OpNode (_.VT _.RC:$src1),
11736                            (_.VT _.RC:$src2),
11737                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11738                            (i8 timm:$src4)), 1, 0>,
11739                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11740                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11741  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11742                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11743                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11744                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11745                    (OpNode (_.VT _.RC:$src1),
11746                            (_.VT _.RC:$src2),
11747                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11748                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11749                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11750                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11751  }// Constraints = "$src1 = $dst"
11752
11753  // Additional patterns for matching passthru operand in other positions.
11754  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11755                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11756                   _.RC:$src1)),
11757            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11758             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11759  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11760                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11761                   _.RC:$src1)),
11762            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11763             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11764
11765  // Additional patterns for matching zero masking with loads in other
11766  // positions.
11767  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11768                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11769                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11770                   _.ImmAllZerosV)),
11771            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11772             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11773  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11774                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11775                    _.RC:$src2, (i8 timm:$src4)),
11776                   _.ImmAllZerosV)),
11777            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11778             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11779
11780  // Additional patterns for matching masked loads with different
11781  // operand orders.
11782  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11783                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11784                    _.RC:$src2, (i8 timm:$src4)),
11785                   _.RC:$src1)),
11786            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11787             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11788  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11789                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11790                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11791                   _.RC:$src1)),
11792            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11793             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11794  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11795                   (OpNode _.RC:$src2, _.RC:$src1,
11796                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11797                   _.RC:$src1)),
11798            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11799             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11800  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11801                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11802                    _.RC:$src1, (i8 timm:$src4)),
11803                   _.RC:$src1)),
11804            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11805             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11806  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11807                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11808                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11809                   _.RC:$src1)),
11810            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11811             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11812
11813  // Additional patterns for matching zero masking with broadcasts in other
11814  // positions.
11815  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11816                   (OpNode (_.BroadcastLdFrag addr:$src3),
11817                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11818                   _.ImmAllZerosV)),
11819            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11820             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11821             (VPTERNLOG321_imm8 timm:$src4))>;
11822  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11823                   (OpNode _.RC:$src1,
11824                    (_.BroadcastLdFrag addr:$src3),
11825                    _.RC:$src2, (i8 timm:$src4)),
11826                   _.ImmAllZerosV)),
11827            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11828             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11829             (VPTERNLOG132_imm8 timm:$src4))>;
11830
11831  // Additional patterns for matching masked broadcasts with different
11832  // operand orders.
11833  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11834                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11835                    _.RC:$src2, (i8 timm:$src4)),
11836                   _.RC:$src1)),
11837            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11838             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11839  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11840                   (OpNode (_.BroadcastLdFrag addr:$src3),
11841                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11842                   _.RC:$src1)),
11843            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11844             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11845  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11846                   (OpNode _.RC:$src2, _.RC:$src1,
11847                    (_.BroadcastLdFrag addr:$src3),
11848                    (i8 timm:$src4)), _.RC:$src1)),
11849            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11850             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11851  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852                   (OpNode _.RC:$src2,
11853                    (_.BroadcastLdFrag addr:$src3),
11854                    _.RC:$src1, (i8 timm:$src4)),
11855                   _.RC:$src1)),
11856            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11857             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11858  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11859                   (OpNode (_.BroadcastLdFrag addr:$src3),
11860                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11861                   _.RC:$src1)),
11862            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11863             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11864}
11865
11866multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11867                                 AVX512VLVectorVTInfo _> {
11868  let Predicates = [HasAVX512] in
11869    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11870                               _.info512, NAME>, EVEX_V512;
11871  let Predicates = [HasAVX512, HasVLX] in {
11872    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11873                               _.info128, NAME>, EVEX_V128;
11874    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11875                               _.info256, NAME>, EVEX_V256;
11876  }
11877}
11878
11879defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11880                                        avx512vl_i32_info>;
11881defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11882                                        avx512vl_i64_info>, REX_W;
11883
11884// Patterns to implement vnot using vpternlog instead of creating all ones
11885// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11886// so that the result is only dependent on src0. But we use the same source
11887// for all operands to prevent a false dependency.
11888// TODO: We should maybe have a more generalized algorithm for folding to
11889// vpternlog.
11890let Predicates = [HasAVX512] in {
11891  def : Pat<(v64i8 (vnot VR512:$src)),
11892            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11893  def : Pat<(v32i16 (vnot VR512:$src)),
11894            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11895  def : Pat<(v16i32 (vnot VR512:$src)),
11896            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11897  def : Pat<(v8i64 (vnot VR512:$src)),
11898            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11899}
11900
11901let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11902  def : Pat<(v16i8 (vnot VR128X:$src)),
11903            (EXTRACT_SUBREG
11904             (VPTERNLOGQZrri
11905              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11906              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11907              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11908              (i8 15)), sub_xmm)>;
11909  def : Pat<(v8i16 (vnot VR128X:$src)),
11910            (EXTRACT_SUBREG
11911             (VPTERNLOGQZrri
11912              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11913              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11914              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11915              (i8 15)), sub_xmm)>;
11916  def : Pat<(v4i32 (vnot VR128X:$src)),
11917            (EXTRACT_SUBREG
11918             (VPTERNLOGQZrri
11919              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11920              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11921              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11922              (i8 15)), sub_xmm)>;
11923  def : Pat<(v2i64 (vnot VR128X:$src)),
11924            (EXTRACT_SUBREG
11925             (VPTERNLOGQZrri
11926              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11927              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11928              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11929              (i8 15)), sub_xmm)>;
11930
11931  def : Pat<(v32i8 (vnot VR256X:$src)),
11932            (EXTRACT_SUBREG
11933             (VPTERNLOGQZrri
11934              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11935              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11936              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11937              (i8 15)), sub_ymm)>;
11938  def : Pat<(v16i16 (vnot VR256X:$src)),
11939            (EXTRACT_SUBREG
11940             (VPTERNLOGQZrri
11941              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11942              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11943              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11944              (i8 15)), sub_ymm)>;
11945  def : Pat<(v8i32 (vnot VR256X:$src)),
11946            (EXTRACT_SUBREG
11947             (VPTERNLOGQZrri
11948              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11949              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11950              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11951              (i8 15)), sub_ymm)>;
11952  def : Pat<(v4i64 (vnot VR256X:$src)),
11953            (EXTRACT_SUBREG
11954             (VPTERNLOGQZrri
11955              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11956              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11957              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11958              (i8 15)), sub_ymm)>;
11959}
11960
11961let Predicates = [HasVLX] in {
11962  def : Pat<(v16i8 (vnot VR128X:$src)),
11963            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11964  def : Pat<(v8i16 (vnot VR128X:$src)),
11965            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11966  def : Pat<(v4i32 (vnot VR128X:$src)),
11967            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11968  def : Pat<(v2i64 (vnot VR128X:$src)),
11969            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11970
11971  def : Pat<(v32i8 (vnot VR256X:$src)),
11972            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11973  def : Pat<(v16i16 (vnot VR256X:$src)),
11974            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11975  def : Pat<(v8i32 (vnot VR256X:$src)),
11976            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11977  def : Pat<(v4i64 (vnot VR256X:$src)),
11978            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11979}
11980
11981//===----------------------------------------------------------------------===//
11982// AVX-512 - FixupImm
11983//===----------------------------------------------------------------------===//
11984
11985multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11986                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11987                                  X86VectorVTInfo TblVT>{
11988  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11989      Uses = [MXCSR], mayRaiseFPException = 1 in {
11990    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11991                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11992                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11993                        (X86VFixupimm (_.VT _.RC:$src1),
11994                                      (_.VT _.RC:$src2),
11995                                      (TblVT.VT _.RC:$src3),
11996                                      (i32 timm:$src4))>, Sched<[sched]>;
11997    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11998                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11999                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12000                      (X86VFixupimm (_.VT _.RC:$src1),
12001                                    (_.VT _.RC:$src2),
12002                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12003                                    (i32 timm:$src4))>,
12004                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12005    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12006                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12007                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12008                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12009                      (X86VFixupimm (_.VT _.RC:$src1),
12010                                    (_.VT _.RC:$src2),
12011                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12012                                    (i32 timm:$src4))>,
12013                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12014  } // Constraints = "$src1 = $dst"
12015}
12016
12017multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12018                                      X86FoldableSchedWrite sched,
12019                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12020  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12021let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12022  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12023                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12024                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12025                      "$src2, $src3, {sae}, $src4",
12026                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12027                                       (_.VT _.RC:$src2),
12028                                       (TblVT.VT _.RC:$src3),
12029                                       (i32 timm:$src4))>,
12030                      EVEX_B, Sched<[sched]>;
12031  }
12032}
12033
12034multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12035                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12036                                  X86VectorVTInfo _src3VT> {
12037  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12038      ExeDomain = _.ExeDomain in {
12039    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12040                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12041                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12042                      (X86VFixupimms (_.VT _.RC:$src1),
12043                                     (_.VT _.RC:$src2),
12044                                     (_src3VT.VT _src3VT.RC:$src3),
12045                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12046    let Uses = [MXCSR] in
12047    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12048                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12049                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12050                      "$src2, $src3, {sae}, $src4",
12051                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12052                                        (_.VT _.RC:$src2),
12053                                        (_src3VT.VT _src3VT.RC:$src3),
12054                                        (i32 timm:$src4))>,
12055                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12056    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12057                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12058                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12059                     (X86VFixupimms (_.VT _.RC:$src1),
12060                                    (_.VT _.RC:$src2),
12061                                    (_src3VT.VT (scalar_to_vector
12062                                              (_src3VT.ScalarLdFrag addr:$src3))),
12063                                    (i32 timm:$src4))>,
12064                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12065  }
12066}
12067
12068multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12069                                      AVX512VLVectorVTInfo _Vec,
12070                                      AVX512VLVectorVTInfo _Tbl> {
12071  let Predicates = [HasAVX512] in
12072    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12073                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12074                                EVEX, VVVV, EVEX_V512;
12075  let Predicates = [HasAVX512, HasVLX] in {
12076    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12077                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12078                            EVEX, VVVV, EVEX_V128;
12079    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12080                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12081                            EVEX, VVVV, EVEX_V256;
12082  }
12083}
12084
12085defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12086                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12087                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
12088defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12089                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12090                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
12091defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12092                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12093defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12094                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12095
12096// Patterns used to select SSE scalar fp arithmetic instructions from
12097// either:
12098//
12099// (1) a scalar fp operation followed by a blend
12100//
12101// The effect is that the backend no longer emits unnecessary vector
12102// insert instructions immediately after SSE scalar fp instructions
12103// like addss or mulss.
12104//
12105// For example, given the following code:
12106//   __m128 foo(__m128 A, __m128 B) {
12107//     A[0] += B[0];
12108//     return A;
12109//   }
12110//
12111// Previously we generated:
12112//   addss %xmm0, %xmm1
12113//   movss %xmm1, %xmm0
12114//
12115// We now generate:
12116//   addss %xmm1, %xmm0
12117//
12118// (2) a vector packed single/double fp operation followed by a vector insert
12119//
12120// The effect is that the backend converts the packed fp instruction
12121// followed by a vector insert into a single SSE scalar fp instruction.
12122//
12123// For example, given the following code:
12124//   __m128 foo(__m128 A, __m128 B) {
12125//     __m128 C = A + B;
12126//     return (__m128) {c[0], a[1], a[2], a[3]};
12127//   }
12128//
12129// Previously we generated:
12130//   addps %xmm0, %xmm1
12131//   movss %xmm1, %xmm0
12132//
12133// We now generate:
12134//   addss %xmm1, %xmm0
12135
12136// TODO: Some canonicalization in lowering would simplify the number of
12137// patterns we have to try to match.
12138multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12139                                          string OpcPrefix, SDNode MoveNode,
12140                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12141  let Predicates = [HasAVX512] in {
12142    // extracted scalar math op with insert via movss
12143    def : Pat<(MoveNode
12144               (_.VT VR128X:$dst),
12145               (_.VT (scalar_to_vector
12146                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12147                          _.FRC:$src)))),
12148              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12149               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12150    def : Pat<(MoveNode
12151               (_.VT VR128X:$dst),
12152               (_.VT (scalar_to_vector
12153                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12154                          (_.ScalarLdFrag addr:$src))))),
12155              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12156
12157    // extracted masked scalar math op with insert via movss
12158    def : Pat<(MoveNode (_.VT VR128X:$src1),
12159               (scalar_to_vector
12160                (X86selects_mask VK1WM:$mask,
12161                            (MaskedOp (_.EltVT
12162                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12163                                      _.FRC:$src2),
12164                            _.FRC:$src0))),
12165              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12166               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12167               VK1WM:$mask, _.VT:$src1,
12168               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12169    def : Pat<(MoveNode (_.VT VR128X:$src1),
12170               (scalar_to_vector
12171                (X86selects_mask VK1WM:$mask,
12172                            (MaskedOp (_.EltVT
12173                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12174                                      (_.ScalarLdFrag addr:$src2)),
12175                            _.FRC:$src0))),
12176              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12177               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12178               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12179
12180    // extracted masked scalar math op with insert via movss
12181    def : Pat<(MoveNode (_.VT VR128X:$src1),
12182               (scalar_to_vector
12183                (X86selects_mask VK1WM:$mask,
12184                            (MaskedOp (_.EltVT
12185                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12186                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12187      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12188          VK1WM:$mask, _.VT:$src1,
12189          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12190    def : Pat<(MoveNode (_.VT VR128X:$src1),
12191               (scalar_to_vector
12192                (X86selects_mask VK1WM:$mask,
12193                            (MaskedOp (_.EltVT
12194                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12195                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12196      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12197  }
12198}
12199
12200defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12201defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12202defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12203defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12204
12205defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12206defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12207defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12208defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12209
12210defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12211defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12212defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12213defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12214
12215multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12216                                             SDNode Move, X86VectorVTInfo _> {
12217  let Predicates = [HasAVX512] in {
12218    def : Pat<(_.VT (Move _.VT:$dst,
12219                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12220              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12221  }
12222}
12223
12224defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12225defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12226defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12227
12228//===----------------------------------------------------------------------===//
12229// AES instructions
12230//===----------------------------------------------------------------------===//
12231
12232multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12233  let Predicates = [HasVLX, HasVAES] in {
12234    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12235                                  !cast<Intrinsic>(IntPrefix),
12236                                  loadv2i64, 0, VR128X, i128mem>,
12237                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12238    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12239                                  !cast<Intrinsic>(IntPrefix#"_256"),
12240                                  loadv4i64, 0, VR256X, i256mem>,
12241                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12242    }
12243    let Predicates = [HasAVX512, HasVAES] in
12244    defm Z    : AESI_binop_rm_int<Op, OpStr,
12245                                  !cast<Intrinsic>(IntPrefix#"_512"),
12246                                  loadv8i64, 0, VR512, i512mem>,
12247                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12248}
12249
12250defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12251defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12252defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12253defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12254
12255//===----------------------------------------------------------------------===//
12256// PCLMUL instructions - Carry less multiplication
12257//===----------------------------------------------------------------------===//
12258
12259let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12260defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12261                              EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12262
12263let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12264defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12265                              EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12266
12267defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12268                                int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12269                                EVEX_CD8<64, CD8VF>, WIG;
12270}
12271
12272// Aliases
12273defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12274defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12275defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12276
12277//===----------------------------------------------------------------------===//
12278// VBMI2
12279//===----------------------------------------------------------------------===//
12280
12281multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12282                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12283  let Constraints = "$src1 = $dst",
12284      ExeDomain   = VTI.ExeDomain in {
12285    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12286                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12287                "$src3, $src2", "$src2, $src3",
12288                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12289                T8, PD, EVEX, VVVV, Sched<[sched]>;
12290    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12291                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12292                "$src3, $src2", "$src2, $src3",
12293                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12294                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12295                T8, PD, EVEX, VVVV,
12296                Sched<[sched.Folded, sched.ReadAfterFold]>;
12297  }
12298}
12299
12300multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12301                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12302         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12303  let Constraints = "$src1 = $dst",
12304      ExeDomain   = VTI.ExeDomain in
12305  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12306              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12307              "${src3}"#VTI.BroadcastStr#", $src2",
12308              "$src2, ${src3}"#VTI.BroadcastStr,
12309              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12310               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12311              T8, PD, EVEX, VVVV, EVEX_B,
12312              Sched<[sched.Folded, sched.ReadAfterFold]>;
12313}
12314
12315multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12316                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12317  let Predicates = [HasVBMI2] in
12318  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12319                                   EVEX_V512;
12320  let Predicates = [HasVBMI2, HasVLX] in {
12321    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12322                                   EVEX_V256;
12323    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12324                                   EVEX_V128;
12325  }
12326}
12327
12328multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12329                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12330  let Predicates = [HasVBMI2] in
12331  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12332                                    EVEX_V512;
12333  let Predicates = [HasVBMI2, HasVLX] in {
12334    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12335                                    EVEX_V256;
12336    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12337                                    EVEX_V128;
12338  }
12339}
12340multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12341                           SDNode OpNode, X86SchedWriteWidths sched> {
12342  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12343             avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12344  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12345             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12346  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12347             avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12348}
12349
12350multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12351                           SDNode OpNode, X86SchedWriteWidths sched> {
12352  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12353             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12354             REX_W, EVEX_CD8<16, CD8VF>;
12355  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12356             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12357  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12358             sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12359}
12360
12361// Concat & Shift
12362defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12363defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12364defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12365defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12366
12367// Compress
12368defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12369                                         avx512vl_i8_info, HasVBMI2>, EVEX;
12370defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12371                                          avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12372// Expand
12373defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12374                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12375defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12376                                      avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12377
12378//===----------------------------------------------------------------------===//
12379// VNNI
12380//===----------------------------------------------------------------------===//
12381
12382let Constraints = "$src1 = $dst" in
12383multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12384                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12385                    bit IsCommutable> {
12386  let ExeDomain = VTI.ExeDomain in {
12387  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12388                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12389                                   "$src3, $src2", "$src2, $src3",
12390                                   (VTI.VT (OpNode VTI.RC:$src1,
12391                                            VTI.RC:$src2, VTI.RC:$src3)),
12392                                   IsCommutable, IsCommutable>,
12393                                   EVEX, VVVV, T8, PD, Sched<[sched]>;
12394  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12395                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12396                                   "$src3, $src2", "$src2, $src3",
12397                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12398                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12399                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12400                                   Sched<[sched.Folded, sched.ReadAfterFold,
12401                                          sched.ReadAfterFold]>;
12402  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12403                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12404                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12405                                   "$src2, ${src3}"#VTI.BroadcastStr,
12406                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12407                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12408                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12409                                   T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12410                                                sched.ReadAfterFold]>;
12411  }
12412}
12413
12414multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12415                       X86SchedWriteWidths sched, bit IsCommutable> {
12416  let Predicates = [HasVNNI] in
12417  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12418                           IsCommutable>, EVEX_V512;
12419  let Predicates = [HasVNNI, HasVLX] in {
12420    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12421                           IsCommutable>, EVEX_V256;
12422    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12423                           IsCommutable>, EVEX_V128;
12424  }
12425}
12426
12427// FIXME: Is there a better scheduler class for VPDP?
12428defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12429defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12430defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12431defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12432
12433// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12434let Predicates = [HasVNNI] in {
12435  def : Pat<(v16i32 (add VR512:$src1,
12436                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12437            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12438  def : Pat<(v16i32 (add VR512:$src1,
12439                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12440            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12441}
12442let Predicates = [HasVNNI,HasVLX] in {
12443  def : Pat<(v8i32 (add VR256X:$src1,
12444                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12445            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12446  def : Pat<(v8i32 (add VR256X:$src1,
12447                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12448            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12449  def : Pat<(v4i32 (add VR128X:$src1,
12450                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12451            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12452  def : Pat<(v4i32 (add VR128X:$src1,
12453                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12454            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12455}
12456
12457//===----------------------------------------------------------------------===//
12458// Bit Algorithms
12459//===----------------------------------------------------------------------===//
12460
12461// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12462defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12463                                   avx512vl_i8_info, HasBITALG>;
12464defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12465                                   avx512vl_i16_info, HasBITALG>, REX_W;
12466
12467defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12468defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12469
12470multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12471  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12472                                (ins VTI.RC:$src1, VTI.RC:$src2),
12473                                "vpshufbitqmb",
12474                                "$src2, $src1", "$src1, $src2",
12475                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12476                                (VTI.VT VTI.RC:$src2)),
12477                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12478                                (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12479                                Sched<[sched]>;
12480  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12481                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12482                                "vpshufbitqmb",
12483                                "$src2, $src1", "$src1, $src2",
12484                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12485                                (VTI.VT (VTI.LdFrag addr:$src2))),
12486                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12487                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12488                                EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12489                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12490}
12491
12492multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12493  let Predicates = [HasBITALG] in
12494  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12495  let Predicates = [HasBITALG, HasVLX] in {
12496    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12497    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12498  }
12499}
12500
12501// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12502defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12503
12504//===----------------------------------------------------------------------===//
12505// GFNI
12506//===----------------------------------------------------------------------===//
12507
12508multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12509                                   X86SchedWriteWidths sched> {
12510  let Predicates = [HasGFNI, HasAVX512] in
12511  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12512                                EVEX_V512;
12513  let Predicates = [HasGFNI, HasVLX] in {
12514    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12515                                EVEX_V256;
12516    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12517                                EVEX_V128;
12518  }
12519}
12520
12521defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12522                                          SchedWriteVecALU>,
12523                                          EVEX_CD8<8, CD8VF>, T8;
12524
12525multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12526                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12527                                      X86VectorVTInfo BcstVTI>
12528           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12529  let ExeDomain = VTI.ExeDomain in
12530  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12531                (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12532                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12533                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12534                (OpNode (VTI.VT VTI.RC:$src1),
12535                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12536                 (i8 timm:$src3))>, EVEX_B,
12537                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12538}
12539
12540multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12541                                     X86SchedWriteWidths sched> {
12542  let Predicates = [HasGFNI, HasAVX512] in
12543  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12544                                           v64i8_info, v8i64_info>, EVEX_V512;
12545  let Predicates = [HasGFNI, HasVLX] in {
12546    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12547                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12548    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12549                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12550  }
12551}
12552
12553defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12554                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12555                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12556defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12557                         X86GF2P8affineqb, SchedWriteVecIMul>,
12558                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12559
12560
12561//===----------------------------------------------------------------------===//
12562// AVX5124FMAPS
12563//===----------------------------------------------------------------------===//
12564
12565let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12566    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12567defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12568                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12569                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12570                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12571                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12572
12573defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12574                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12575                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12576                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12577                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12578
12579defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12580                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12581                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12582                    []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12583                    Sched<[SchedWriteFMA.Scl.Folded]>;
12584
12585defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12586                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12587                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12588                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12589                     Sched<[SchedWriteFMA.Scl.Folded]>;
12590}
12591
12592//===----------------------------------------------------------------------===//
12593// AVX5124VNNIW
12594//===----------------------------------------------------------------------===//
12595
12596let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12597    Constraints = "$src1 = $dst" in {
12598defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12599                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12600                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12601                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12602                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12603
12604defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12605                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12606                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12607                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12608                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12609}
12610
12611let hasSideEffects = 0 in {
12612  let mayStore = 1, SchedRW = [WriteFStoreX] in
12613  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12614  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12615  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12616}
12617
12618//===----------------------------------------------------------------------===//
12619// VP2INTERSECT
12620//===----------------------------------------------------------------------===//
12621
12622multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12623  def rr : I<0x68, MRMSrcReg,
12624                  (outs _.KRPC:$dst),
12625                  (ins _.RC:$src1, _.RC:$src2),
12626                  !strconcat("vp2intersect", _.Suffix,
12627                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12628                  [(set _.KRPC:$dst, (X86vp2intersect
12629                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12630                  EVEX, VVVV, T8, XD, Sched<[sched]>;
12631
12632  def rm : I<0x68, MRMSrcMem,
12633                  (outs _.KRPC:$dst),
12634                  (ins  _.RC:$src1, _.MemOp:$src2),
12635                  !strconcat("vp2intersect", _.Suffix,
12636                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12637                  [(set _.KRPC:$dst, (X86vp2intersect
12638                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12639                  EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12640                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12641
12642  def rmb : I<0x68, MRMSrcMem,
12643                  (outs _.KRPC:$dst),
12644                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12645                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12646                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12647                  [(set _.KRPC:$dst, (X86vp2intersect
12648                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12649                  EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12650                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12651}
12652
12653multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12654  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12655    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12656
12657  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12658    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12659    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12660  }
12661}
12662
12663let ExeDomain = SSEPackedInt in {
12664defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12665defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12666}
12667
12668multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12669                             X86SchedWriteWidths sched,
12670                             AVX512VLVectorVTInfo _SrcVTInfo,
12671                             AVX512VLVectorVTInfo _DstVTInfo,
12672                             SDNode OpNode, Predicate prd,
12673                             bit IsCommutable = 0> {
12674  let Predicates = [prd] in
12675    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12676                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12677                                   _SrcVTInfo.info512, IsCommutable>,
12678                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12679  let Predicates = [HasVLX, prd] in {
12680    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12681                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12682                                      _SrcVTInfo.info256, IsCommutable>,
12683                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12684    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12685                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12686                                      _SrcVTInfo.info128, IsCommutable>,
12687                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12688  }
12689}
12690
12691let ExeDomain = SSEPackedSingle in
12692defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12693                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12694                                        avx512vl_f32_info, avx512vl_bf16_info,
12695                                        X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12696
12697// Truncate Float to BFloat16
12698multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12699                             X86SchedWriteWidths sched> {
12700  let ExeDomain = SSEPackedSingle in {
12701  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12702    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12703                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12704  }
12705  let Predicates = [HasBF16, HasVLX] in {
12706    let Uses = []<Register>, mayRaiseFPException = 0 in {
12707    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12708                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12709                               VK4WM>, EVEX_V128;
12710    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12711                               X86cvtneps2bf16, X86cvtneps2bf16,
12712                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12713    }
12714  } // Predicates = [HasBF16, HasVLX]
12715  } // ExeDomain = SSEPackedSingle
12716
12717  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12718                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12719                  VR128X:$src), 0>;
12720  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12721                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12722                  f128mem:$src), 0, "intel">;
12723  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12724                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12725                  VR256X:$src), 0>;
12726  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12727                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12728                  f256mem:$src), 0, "intel">;
12729}
12730
12731defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12732                                       SchedWriteCvtPD2PS>, T8, XS,
12733                                       EVEX_CD8<32, CD8VF>;
12734
12735let Predicates = [HasBF16, HasVLX] in {
12736  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12737  // patterns have been disabled with null_frag.
12738  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12739            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12740  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12741                              VK4WM:$mask),
12742            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12743  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12744                              VK4WM:$mask),
12745            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12746
12747  def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12748            (VCVTNEPS2BF16Z128rm addr:$src)>;
12749  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12750                              VK4WM:$mask),
12751            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12752  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12753                              VK4WM:$mask),
12754            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12755
12756  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12757                                     (X86VBroadcastld32 addr:$src)))),
12758            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12759  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12760                              (v8bf16 VR128X:$src0), VK4WM:$mask),
12761            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12762  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12763                              v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12764            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12765
12766  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12767            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12768  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12769            (VCVTNEPS2BF16Z128rm addr:$src)>;
12770
12771  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12772            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12773  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12774            (VCVTNEPS2BF16Z256rm addr:$src)>;
12775
12776  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12777            (VPBROADCASTWZ128rm addr:$src)>;
12778  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12779            (VPBROADCASTWZ256rm addr:$src)>;
12780
12781  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12782            (VPBROADCASTWZ128rr VR128X:$src)>;
12783  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12784            (VPBROADCASTWZ256rr VR128X:$src)>;
12785
12786  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12787            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12788  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12789            (VCVTNEPS2BF16Z256rm addr:$src)>;
12790
12791  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12792}
12793
12794let Predicates = [HasBF16] in {
12795  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12796            (VPBROADCASTWZrm addr:$src)>;
12797
12798  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12799            (VPBROADCASTWZrr VR128X:$src)>;
12800
12801  def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12802            (VCVTNEPS2BF16Zrr VR512:$src)>;
12803  def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12804            (VCVTNEPS2BF16Zrm addr:$src)>;
12805  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12806}
12807
12808let Constraints = "$src1 = $dst" in {
12809multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12810                              X86FoldableSchedWrite sched,
12811                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12812  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12813                           (ins src_v.RC:$src2, src_v.RC:$src3),
12814                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12815                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12816                           EVEX, VVVV, Sched<[sched]>;
12817
12818  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12819                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12820                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12821                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12822                               (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12823                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12824
12825  let mayLoad = 1, hasSideEffects = 0 in
12826  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12827                  (ins src_v.RC:$src2, f32mem:$src3),
12828                  OpcodeStr,
12829                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12830                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12831                  (null_frag)>,
12832                  EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12833
12834}
12835} // Constraints = "$src1 = $dst"
12836
12837multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12838                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12839                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12840  let Predicates = [prd] in {
12841    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12842                                   src_v.info512>, EVEX_V512;
12843  }
12844  let Predicates = [HasVLX, prd] in {
12845    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12846                                   src_v.info256>, EVEX_V256;
12847    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12848                                   src_v.info128>, EVEX_V128;
12849  }
12850}
12851
12852let ExeDomain = SSEPackedSingle in
12853defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12854                                       avx512vl_f32_info, avx512vl_bf16_info,
12855                                       HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12856
12857//===----------------------------------------------------------------------===//
12858// AVX512FP16
12859//===----------------------------------------------------------------------===//
12860
12861let Predicates = [HasFP16] in {
12862// Move word ( r/m16) to Packed word
12863def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12864                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12865def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12866                      "vmovw\t{$src, $dst|$dst, $src}",
12867                      [(set VR128X:$dst,
12868                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12869                      T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12870
12871def : Pat<(f16 (bitconvert GR16:$src)),
12872          (f16 (COPY_TO_REGCLASS
12873                (VMOVW2SHrr
12874                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12875                FR16X))>;
12876def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12877          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12878def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12879          (VMOVW2SHrr GR32:$src)>;
12880// FIXME: We should really find a way to improve these patterns.
12881def : Pat<(v8i32 (X86vzmovl
12882                  (insert_subvector undef,
12883                                    (v4i32 (scalar_to_vector
12884                                            (and GR32:$src, 0xffff))),
12885                                    (iPTR 0)))),
12886          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12887def : Pat<(v16i32 (X86vzmovl
12888                   (insert_subvector undef,
12889                                     (v4i32 (scalar_to_vector
12890                                             (and GR32:$src, 0xffff))),
12891                                     (iPTR 0)))),
12892          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12893
12894def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12895          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12896
12897// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12898def : Pat<(v8i16 (X86vzload16 addr:$src)),
12899          (VMOVWrm addr:$src)>;
12900def : Pat<(v16i16 (X86vzload16 addr:$src)),
12901          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12902
12903// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12904def : Pat<(v32i16 (X86vzload16 addr:$src)),
12905          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12906
12907def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12908          (VMOVWrm addr:$src)>;
12909def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12910          (VMOVWrm addr:$src)>;
12911def : Pat<(v8i32 (X86vzmovl
12912                  (insert_subvector undef,
12913                                    (v4i32 (scalar_to_vector
12914                                            (i32 (zextloadi16 addr:$src)))),
12915                                    (iPTR 0)))),
12916          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12917def : Pat<(v16i32 (X86vzmovl
12918                   (insert_subvector undef,
12919                                     (v4i32 (scalar_to_vector
12920                                             (i32 (zextloadi16 addr:$src)))),
12921                                     (iPTR 0)))),
12922          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12923
12924// Move word from xmm register to r/m16
12925def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12926                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12927def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12928                       (ins i16mem:$dst, VR128X:$src),
12929                       "vmovw\t{$src, $dst|$dst, $src}",
12930                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12931                                     (iPTR 0))), addr:$dst)]>,
12932                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12933
12934def : Pat<(i16 (bitconvert FR16X:$src)),
12935          (i16 (EXTRACT_SUBREG
12936                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12937                sub_16bit))>;
12938def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12939          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12940
12941// Allow "vmovw" to use GR64
12942let hasSideEffects = 0 in {
12943  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12944                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12945  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12946                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12947}
12948}
12949
12950// Convert 16-bit float to i16/u16
12951multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12952                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12953                          AVX512VLVectorVTInfo _Dst,
12954                          AVX512VLVectorVTInfo _Src,
12955                          X86SchedWriteWidths sched> {
12956  let Predicates = [HasFP16] in {
12957    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12958                            OpNode, MaskOpNode, sched.ZMM>,
12959             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12960                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12961  }
12962  let Predicates = [HasFP16, HasVLX] in {
12963    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12964                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12965    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12966                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12967  }
12968}
12969
12970// Convert 16-bit float to i16/u16 truncate
12971multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12972                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12973                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12974                           X86SchedWriteWidths sched> {
12975  let Predicates = [HasFP16] in {
12976    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12977                            OpNode, MaskOpNode, sched.ZMM>,
12978             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12979                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12980  }
12981  let Predicates = [HasFP16, HasVLX] in {
12982    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12983                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12984    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12985                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12986  }
12987}
12988
12989defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12990                                X86cvtp2UIntRnd, avx512vl_i16_info,
12991                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12992                                T_MAP5, EVEX_CD8<16, CD8VF>;
12993defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12994                                X86VUintToFpRnd, avx512vl_f16_info,
12995                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12996                                T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12997defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12998                                X86cvttp2si, X86cvttp2siSAE,
12999                                avx512vl_i16_info, avx512vl_f16_info,
13000                                SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13001defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13002                                X86cvttp2ui, X86cvttp2uiSAE,
13003                                avx512vl_i16_info, avx512vl_f16_info,
13004                                SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
13005defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13006                                X86cvtp2IntRnd, avx512vl_i16_info,
13007                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13008                                T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13009defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13010                                X86VSintToFpRnd, avx512vl_f16_info,
13011                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13012                                T_MAP5, XS, EVEX_CD8<16, CD8VF>;
13013
13014// Convert Half to Signed/Unsigned Doubleword
13015multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13016                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13017                           X86SchedWriteWidths sched> {
13018  let Predicates = [HasFP16] in {
13019    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13020                            MaskOpNode, sched.ZMM>,
13021             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13022                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13023  }
13024  let Predicates = [HasFP16, HasVLX] in {
13025    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13026                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13027    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13028                               MaskOpNode, sched.YMM>, EVEX_V256;
13029  }
13030}
13031
13032// Convert Half to Signed/Unsigned Doubleword with truncation
13033multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13034                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13035                            X86SchedWriteWidths sched> {
13036  let Predicates = [HasFP16] in {
13037    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13038                            MaskOpNode, sched.ZMM>,
13039             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13040                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13041  }
13042  let Predicates = [HasFP16, HasVLX] in {
13043    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13044                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13045    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13046                               MaskOpNode, sched.YMM>, EVEX_V256;
13047  }
13048}
13049
13050
13051defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13052                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13053                                 EVEX_CD8<16, CD8VH>;
13054defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13055                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
13056                                 EVEX_CD8<16, CD8VH>;
13057
13058defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13059                                X86cvttp2si, X86cvttp2siSAE,
13060                                SchedWriteCvtPS2DQ>, T_MAP5, XS,
13061                                EVEX_CD8<16, CD8VH>;
13062
13063defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13064                                 X86cvttp2ui, X86cvttp2uiSAE,
13065                                 SchedWriteCvtPS2DQ>, T_MAP5,
13066                                 EVEX_CD8<16, CD8VH>;
13067
13068// Convert Half to Signed/Unsigned Quardword
13069multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13070                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13071                           X86SchedWriteWidths sched> {
13072  let Predicates = [HasFP16] in {
13073    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13074                            MaskOpNode, sched.ZMM>,
13075             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13076                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13077  }
13078  let Predicates = [HasFP16, HasVLX] in {
13079    // Explicitly specified broadcast string, since we take only 2 elements
13080    // from v8f16x_info source
13081    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13082                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13083                               EVEX_V128;
13084    // Explicitly specified broadcast string, since we take only 4 elements
13085    // from v8f16x_info source
13086    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13087                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13088                               EVEX_V256;
13089  }
13090}
13091
13092// Convert Half to Signed/Unsigned Quardword with truncation
13093multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13094                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13095                            X86SchedWriteWidths sched> {
13096  let Predicates = [HasFP16] in {
13097    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13098                            MaskOpNode, sched.ZMM>,
13099             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13100                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13101  }
13102  let Predicates = [HasFP16, HasVLX] in {
13103    // Explicitly specified broadcast string, since we take only 2 elements
13104    // from v8f16x_info source
13105    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13106                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13107    // Explicitly specified broadcast string, since we take only 4 elements
13108    // from v8f16x_info source
13109    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13110                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13111  }
13112}
13113
13114defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13115                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13116                                 EVEX_CD8<16, CD8VQ>;
13117
13118defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13119                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13120                                 EVEX_CD8<16, CD8VQ>;
13121
13122defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13123                                 X86cvttp2si, X86cvttp2siSAE,
13124                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13125                                 EVEX_CD8<16, CD8VQ>;
13126
13127defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13128                                 X86cvttp2ui, X86cvttp2uiSAE,
13129                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13130                                 EVEX_CD8<16, CD8VQ>;
13131
13132// Convert Signed/Unsigned Quardword to Half
13133multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13134                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13135                           X86SchedWriteWidths sched> {
13136  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13137  // 512 memory forms of these instructions in Asm Parcer. They have the same
13138  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13139  // due to the same reason.
13140  let Predicates = [HasFP16] in {
13141    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13142                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13143             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13144                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13145  }
13146  let Predicates = [HasFP16, HasVLX] in {
13147    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13148                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13149                               i128mem, VK2WM>, EVEX_V128;
13150    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13151                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13152                               i256mem, VK4WM>, EVEX_V256;
13153  }
13154
13155  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13156                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13157                  VR128X:$src), 0, "att">;
13158  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13159                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13160                  VK2WM:$mask, VR128X:$src), 0, "att">;
13161  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13162                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13163                  VK2WM:$mask, VR128X:$src), 0, "att">;
13164  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13165                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13166                  i64mem:$src), 0, "att">;
13167  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13168                  "$dst {${mask}}, ${src}{1to2}}",
13169                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13170                  VK2WM:$mask, i64mem:$src), 0, "att">;
13171  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13172                  "$dst {${mask}} {z}, ${src}{1to2}}",
13173                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13174                  VK2WM:$mask, i64mem:$src), 0, "att">;
13175
13176  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13177                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13178                  VR256X:$src), 0, "att">;
13179  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13180                  "$dst {${mask}}, $src}",
13181                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13182                  VK4WM:$mask, VR256X:$src), 0, "att">;
13183  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13184                  "$dst {${mask}} {z}, $src}",
13185                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13186                  VK4WM:$mask, VR256X:$src), 0, "att">;
13187  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13188                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13189                  i64mem:$src), 0, "att">;
13190  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13191                  "$dst {${mask}}, ${src}{1to4}}",
13192                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13193                  VK4WM:$mask, i64mem:$src), 0, "att">;
13194  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13195                  "$dst {${mask}} {z}, ${src}{1to4}}",
13196                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13197                  VK4WM:$mask, i64mem:$src), 0, "att">;
13198
13199  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13200                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13201                  VR512:$src), 0, "att">;
13202  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13203                  "$dst {${mask}}, $src}",
13204                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13205                  VK8WM:$mask, VR512:$src), 0, "att">;
13206  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13207                  "$dst {${mask}} {z}, $src}",
13208                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13209                  VK8WM:$mask, VR512:$src), 0, "att">;
13210  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13211                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13212                  i64mem:$src), 0, "att">;
13213  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13214                  "$dst {${mask}}, ${src}{1to8}}",
13215                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13216                  VK8WM:$mask, i64mem:$src), 0, "att">;
13217  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13218                  "$dst {${mask}} {z}, ${src}{1to8}}",
13219                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13220                  VK8WM:$mask, i64mem:$src), 0, "att">;
13221}
13222
13223defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13224                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13225                            EVEX_CD8<64, CD8VF>;
13226
13227defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13228                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13229                            EVEX_CD8<64, CD8VF>;
13230
13231// Convert half to signed/unsigned int 32/64
13232defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13233                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13234                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13235defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13236                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13237                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13238defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13239                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13240                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13241defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13242                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13243                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13244
13245defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13246                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13247                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13248defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13249                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13250                        "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13251defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13252                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13253                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13254defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13255                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13256                        "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13257
13258let Predicates = [HasFP16] in {
13259  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13260                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13261                                   T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13262  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13263                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13264                                   T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13265  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13266                                    v8f16x_info, i32mem, loadi32,
13267                                    "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13268  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13269                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13270                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13271  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13272              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13273
13274  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13275              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13276
13277
13278  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13279            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13280  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13281            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13282
13283  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13284            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13285  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13286            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13287
13288  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13289            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13290  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13291            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13292
13293  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13294            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13295  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13296            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13297
13298  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13299  // which produce unnecessary vmovsh instructions
13300  def : Pat<(v8f16 (X86Movsh
13301                     (v8f16 VR128X:$dst),
13302                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13303            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13304
13305  def : Pat<(v8f16 (X86Movsh
13306                     (v8f16 VR128X:$dst),
13307                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13308            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13309
13310  def : Pat<(v8f16 (X86Movsh
13311                     (v8f16 VR128X:$dst),
13312                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13313            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13314
13315  def : Pat<(v8f16 (X86Movsh
13316                     (v8f16 VR128X:$dst),
13317                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13318            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13319
13320  def : Pat<(v8f16 (X86Movsh
13321                     (v8f16 VR128X:$dst),
13322                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13323            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13324
13325  def : Pat<(v8f16 (X86Movsh
13326                     (v8f16 VR128X:$dst),
13327                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13328            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13329
13330  def : Pat<(v8f16 (X86Movsh
13331                     (v8f16 VR128X:$dst),
13332                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13333            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13334
13335  def : Pat<(v8f16 (X86Movsh
13336                     (v8f16 VR128X:$dst),
13337                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13338            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13339} // Predicates = [HasFP16]
13340
13341let Predicates = [HasFP16, HasVLX] in {
13342  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13343  // patterns have been disabled with null_frag.
13344  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13345            (VCVTQQ2PHZ256rr VR256X:$src)>;
13346  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13347                           VK4WM:$mask),
13348            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13349  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13350                           VK4WM:$mask),
13351            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13352
13353  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13354            (VCVTQQ2PHZ256rm addr:$src)>;
13355  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13356                           VK4WM:$mask),
13357            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13358  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13359                           VK4WM:$mask),
13360            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13361
13362  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13363            (VCVTQQ2PHZ256rmb addr:$src)>;
13364  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13365                           (v8f16 VR128X:$src0), VK4WM:$mask),
13366            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13367  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13368                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13369            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13370
13371  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13372            (VCVTQQ2PHZ128rr VR128X:$src)>;
13373  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13374                           VK2WM:$mask),
13375            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13376  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13377                           VK2WM:$mask),
13378            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13379
13380  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13381            (VCVTQQ2PHZ128rm addr:$src)>;
13382  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13383                           VK2WM:$mask),
13384            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13385  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13386                           VK2WM:$mask),
13387            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13388
13389  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13390            (VCVTQQ2PHZ128rmb addr:$src)>;
13391  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13392                           (v8f16 VR128X:$src0), VK2WM:$mask),
13393            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13394  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13395                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13396            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13397
13398  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13399  // patterns have been disabled with null_frag.
13400  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13401            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13402  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13403                           VK4WM:$mask),
13404            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13405  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13406                           VK4WM:$mask),
13407            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13408
13409  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13410            (VCVTUQQ2PHZ256rm addr:$src)>;
13411  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13412                           VK4WM:$mask),
13413            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13414  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13415                           VK4WM:$mask),
13416            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13417
13418  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13419            (VCVTUQQ2PHZ256rmb addr:$src)>;
13420  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13421                           (v8f16 VR128X:$src0), VK4WM:$mask),
13422            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13423  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13424                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13425            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13426
13427  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13428            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13429  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13430                           VK2WM:$mask),
13431            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13432  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13433                           VK2WM:$mask),
13434            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13435
13436  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13437            (VCVTUQQ2PHZ128rm addr:$src)>;
13438  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13439                           VK2WM:$mask),
13440            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13441  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13442                           VK2WM:$mask),
13443            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13444
13445  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13446            (VCVTUQQ2PHZ128rmb addr:$src)>;
13447  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13448                           (v8f16 VR128X:$src0), VK2WM:$mask),
13449            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13450  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13451                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13452            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13453}
13454
13455let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13456  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13457    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13458            (ins _.RC:$src2, _.RC:$src3),
13459            OpcodeStr, "$src3, $src2", "$src2, $src3",
13460            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13461
13462    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13463            (ins _.RC:$src2, _.MemOp:$src3),
13464            OpcodeStr, "$src3, $src2", "$src2, $src3",
13465            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13466
13467    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13468            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13469            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13470            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13471  }
13472} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13473
13474multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13475                                 X86VectorVTInfo _> {
13476  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13477  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13478          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13479          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13480          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13481          EVEX, VVVV, EVEX_B, EVEX_RC;
13482}
13483
13484
13485multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13486  let Predicates = [HasFP16] in {
13487    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13488                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13489                      EVEX_V512, Sched<[WriteFMAZ]>;
13490  }
13491  let Predicates = [HasVLX, HasFP16] in {
13492    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13493    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13494  }
13495}
13496
13497multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13498                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13499  let Predicates = [HasFP16] in {
13500    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13501                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13502                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13503                                       "", "@earlyclobber $dst">, EVEX_V512;
13504  }
13505  let Predicates = [HasVLX, HasFP16] in {
13506    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13507                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13508    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13509                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13510  }
13511}
13512
13513
13514let Uses = [MXCSR] in {
13515  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13516                                    T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13517  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13518                                    T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13519
13520  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13521                                         x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13522  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13523                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13524}
13525
13526
13527multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13528                                   bit IsCommutable> {
13529  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13530    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13531                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13532                        "$src3, $src2", "$src2, $src3",
13533                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13534                        Sched<[WriteFMAX]>;
13535    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13536                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13537                        "$src3, $src2", "$src2, $src3",
13538                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13539                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13540    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13541                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13542                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13543                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13544                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13545  }
13546}
13547
13548multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13549                                     SDNode OpNodeRnd, bit IsCommutable> {
13550  let Predicates = [HasFP16] in {
13551    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13552                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13553                        "$src2, $src1", "$src1, $src2",
13554                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13555                        IsCommutable, IsCommutable, IsCommutable,
13556                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13557    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13558                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13559                        "$src2, $src1", "$src1, $src2",
13560                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13561                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13562                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13563    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13564                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13565                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13566                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13567                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13568                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13569  }
13570}
13571
13572let Uses = [MXCSR] in {
13573  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13574                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13575  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13576                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13577
13578  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13579                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13580  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13581                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13582}
13583