xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
2*0b57cec5SDimitry Andric//
3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric//
7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric//
9*0b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions,
10*0b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation,
11*0b57cec5SDimitry Andric// machine code emission, and analysis.
12*0b57cec5SDimitry Andric//
13*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
14*0b57cec5SDimitry Andric
15*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
16*0b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes
17*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
18*0b57cec5SDimitry Andric
19*0b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
20*0b57cec5SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
21*0b57cec5SDimitry Andric                           RegisterClass RC, X86MemOperand x86memop,
22*0b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
23*0b57cec5SDimitry Andric                           bit Is2Addr = 1> {
24*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
25*0b57cec5SDimitry Andric  let isCommutable = 1 in {
26*0b57cec5SDimitry Andric    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
27*0b57cec5SDimitry Andric       !if(Is2Addr,
28*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
29*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
30*0b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>,
31*0b57cec5SDimitry Andric       Sched<[sched]>;
32*0b57cec5SDimitry Andric  }
33*0b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
34*0b57cec5SDimitry Andric       !if(Is2Addr,
35*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
36*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37*0b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
38*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
39*0b57cec5SDimitry Andric}
40*0b57cec5SDimitry Andric}
41*0b57cec5SDimitry Andric
42*0b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
43*0b57cec5SDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
44*0b57cec5SDimitry Andric                               SDPatternOperator OpNode, RegisterClass RC,
45*0b57cec5SDimitry Andric                               ValueType VT, string asm, Operand memopr,
46*0b57cec5SDimitry Andric                               ComplexPattern mem_cpat, Domain d,
47*0b57cec5SDimitry Andric                               X86FoldableSchedWrite sched, bit Is2Addr = 1> {
48*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
49*0b57cec5SDimitry Andric  def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
50*0b57cec5SDimitry Andric       !if(Is2Addr,
51*0b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
52*0b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
53*0b57cec5SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>,
54*0b57cec5SDimitry Andric       Sched<[sched]>;
55*0b57cec5SDimitry Andric  let mayLoad = 1 in
56*0b57cec5SDimitry Andric  def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
57*0b57cec5SDimitry Andric       !if(Is2Addr,
58*0b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
59*0b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
60*0b57cec5SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
61*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
62*0b57cec5SDimitry Andric}
63*0b57cec5SDimitry Andric}
64*0b57cec5SDimitry Andric
65*0b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class
66*0b57cec5SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
67*0b57cec5SDimitry Andric                           RegisterClass RC, ValueType vt,
68*0b57cec5SDimitry Andric                           X86MemOperand x86memop, PatFrag mem_frag,
69*0b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
70*0b57cec5SDimitry Andric                           bit Is2Addr = 1> {
71*0b57cec5SDimitry Andric  let isCommutable = 1 in
72*0b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
73*0b57cec5SDimitry Andric       !if(Is2Addr,
74*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
75*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
76*0b57cec5SDimitry Andric       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>,
77*0b57cec5SDimitry Andric       Sched<[sched]>;
78*0b57cec5SDimitry Andric  let mayLoad = 1 in
79*0b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
80*0b57cec5SDimitry Andric       !if(Is2Addr,
81*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
82*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
83*0b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
84*0b57cec5SDimitry Andric          d>,
85*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
86*0b57cec5SDimitry Andric}
87*0b57cec5SDimitry Andric
88*0b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
89*0b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
90*0b57cec5SDimitry Andric                                      string OpcodeStr, X86MemOperand x86memop,
91*0b57cec5SDimitry Andric                                      X86FoldableSchedWrite sched,
92*0b57cec5SDimitry Andric                                      list<dag> pat_rr, list<dag> pat_rm,
93*0b57cec5SDimitry Andric                                      bit Is2Addr = 1> {
94*0b57cec5SDimitry Andric  let isCommutable = 1, hasSideEffects = 0 in
95*0b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
96*0b57cec5SDimitry Andric       !if(Is2Addr,
97*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
98*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
99*0b57cec5SDimitry Andric       pat_rr, d>,
100*0b57cec5SDimitry Andric       Sched<[sched]>;
101*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
102*0b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
103*0b57cec5SDimitry Andric       !if(Is2Addr,
104*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
105*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
106*0b57cec5SDimitry Andric       pat_rm, d>,
107*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
108*0b57cec5SDimitry Andric}
109*0b57cec5SDimitry Andric
110*0b57cec5SDimitry Andric
111*0b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
112*0b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos.
113*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
114*0b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
115*0b57cec5SDimitry Andric  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
116*0b57cec5SDimitry Andric                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
117*0b57cec5SDimitry Andric  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
118*0b57cec5SDimitry Andric                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
119*0b57cec5SDimitry Andric}
120*0b57cec5SDimitry Andric
121*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
122*0b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors
123*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
124*0b57cec5SDimitry Andric
125*0b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse.
126*0b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
127*0b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor.
128*0b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
129*0b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial.
130*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
131*0b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
132*0b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
133*0b57cec5SDimitry Andric               [(set VR128:$dst, (v4f32 immAllZerosV))]>;
134*0b57cec5SDimitry Andric}
135*0b57cec5SDimitry Andric
136*0b57cec5SDimitry Andriclet Predicates = [NoAVX512] in
137*0b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>;
138*0b57cec5SDimitry Andric
139*0b57cec5SDimitry Andric
140*0b57cec5SDimitry Andric// The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
141*0b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero
142*0b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY
143*0b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty
144*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
145*0b57cec5SDimitry Andric    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
146*0b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
147*0b57cec5SDimitry Andric                 [(set VR256:$dst, (v8i32 immAllZerosV))]>;
148*0b57cec5SDimitry Andric}
149*0b57cec5SDimitry Andric
150*0b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
151*0b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial.
152*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
153*0b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
154*0b57cec5SDimitry Andric  def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
155*0b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 immAllOnesV))]>;
156*0b57cec5SDimitry Andric  let Predicates = [HasAVX1Only, OptForMinSize] in {
157*0b57cec5SDimitry Andric  def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
158*0b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
159*0b57cec5SDimitry Andric  }
160*0b57cec5SDimitry Andric  let Predicates = [HasAVX2] in
161*0b57cec5SDimitry Andric  def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
162*0b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
163*0b57cec5SDimitry Andric}
164*0b57cec5SDimitry Andric
165*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
166*0b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions
167*0b57cec5SDimitry Andric//
168*0b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
169*0b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register
170*0b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
171*0b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we
172*0b57cec5SDimitry Andric// don't use movss/movsd for copies.
173*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
174*0b57cec5SDimitry Andric
175*0b57cec5SDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt,
176*0b57cec5SDimitry Andric                         X86MemOperand x86memop, string base_opc,
177*0b57cec5SDimitry Andric                         string asm_opr, Domain d, string Name> {
178*0b57cec5SDimitry Andric  let isCommutable = 1 in
179*0b57cec5SDimitry Andric  def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
180*0b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2),
181*0b57cec5SDimitry Andric              !strconcat(base_opc, asm_opr),
182*0b57cec5SDimitry Andric              [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>,
183*0b57cec5SDimitry Andric              Sched<[SchedWriteFShuffle.XMM]>;
184*0b57cec5SDimitry Andric
185*0b57cec5SDimitry Andric  // For the disassembler
186*0b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
187*0b57cec5SDimitry Andric  def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
188*0b57cec5SDimitry Andric                  (ins VR128:$src1, VR128:$src2),
189*0b57cec5SDimitry Andric                  !strconcat(base_opc, asm_opr), []>,
190*0b57cec5SDimitry Andric                  Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>;
191*0b57cec5SDimitry Andric}
192*0b57cec5SDimitry Andric
193*0b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
194*0b57cec5SDimitry Andric                      X86MemOperand x86memop, string OpcodeStr,
195*0b57cec5SDimitry Andric                      Domain d, string Name, Predicate pred> {
196*0b57cec5SDimitry Andric  // AVX
197*0b57cec5SDimitry Andric  let Predicates = [UseAVX, OptForSize] in
198*0b57cec5SDimitry Andric  defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
199*0b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
200*0b57cec5SDimitry Andric                              "V"#Name>,
201*0b57cec5SDimitry Andric                              VEX_4V, VEX_LIG, VEX_WIG;
202*0b57cec5SDimitry Andric
203*0b57cec5SDimitry Andric  def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
204*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
205*0b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
206*0b57cec5SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG;
207*0b57cec5SDimitry Andric  // SSE1 & 2
208*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
209*0b57cec5SDimitry Andric    let Predicates = [pred, NoSSE41_Or_OptForSize] in
210*0b57cec5SDimitry Andric    defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
211*0b57cec5SDimitry Andric                              "\t{$src2, $dst|$dst, $src2}", d, Name>;
212*0b57cec5SDimitry Andric  }
213*0b57cec5SDimitry Andric
214*0b57cec5SDimitry Andric  def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
215*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
216*0b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
217*0b57cec5SDimitry Andric                     Sched<[WriteFStore]>;
218*0b57cec5SDimitry Andric
219*0b57cec5SDimitry Andric  def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
220*0b57cec5SDimitry Andric                  (!cast<Instruction>("V"#NAME#"rr_REV")
221*0b57cec5SDimitry Andric                   VR128:$dst, VR128:$src1, VR128:$src2), 0>;
222*0b57cec5SDimitry Andric  def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}",
223*0b57cec5SDimitry Andric                  (!cast<Instruction>(NAME#"rr_REV")
224*0b57cec5SDimitry Andric                   VR128:$dst, VR128:$src2), 0>;
225*0b57cec5SDimitry Andric}
226*0b57cec5SDimitry Andric
227*0b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits.
228*0b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
229*0b57cec5SDimitry Andric                         PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
230*0b57cec5SDimitry Andric                         Domain d> {
231*0b57cec5SDimitry Andric  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
232*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
233*0b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
234*0b57cec5SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
235*0b57cec5SDimitry Andric  def NAME#rm   : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
236*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
237*0b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
238*0b57cec5SDimitry Andric                     Sched<[WriteFLoad]>;
239*0b57cec5SDimitry Andric
240*0b57cec5SDimitry Andric  // _alt version uses FR32/FR64 register class.
241*0b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
242*0b57cec5SDimitry Andric  def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
243*0b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
244*0b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
245*0b57cec5SDimitry Andric                         VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
246*0b57cec5SDimitry Andric  def NAME#rm_alt   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
247*0b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
248*0b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
249*0b57cec5SDimitry Andric                         Sched<[WriteFLoad]>;
250*0b57cec5SDimitry Andric  }
251*0b57cec5SDimitry Andric}
252*0b57cec5SDimitry Andric
253*0b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
254*0b57cec5SDimitry Andric                        SSEPackedSingle, "MOVSS", UseSSE1>, XS;
255*0b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
256*0b57cec5SDimitry Andric                        SSEPackedDouble, "MOVSD", UseSSE2>, XD;
257*0b57cec5SDimitry Andric
258*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in {
259*0b57cec5SDimitry Andric  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
260*0b57cec5SDimitry Andric                             SSEPackedSingle>, XS;
261*0b57cec5SDimitry Andric  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
262*0b57cec5SDimitry Andric                             SSEPackedDouble>, XD;
263*0b57cec5SDimitry Andric}
264*0b57cec5SDimitry Andric
265*0b57cec5SDimitry Andric// Patterns
266*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
267*0b57cec5SDimitry Andric  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
268*0b57cec5SDimitry Andric            (VMOVSSrm addr:$src)>;
269*0b57cec5SDimitry Andric  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
270*0b57cec5SDimitry Andric            (VMOVSDrm addr:$src)>;
271*0b57cec5SDimitry Andric
272*0b57cec5SDimitry Andric  // Represent the same patterns above but in the form they appear for
273*0b57cec5SDimitry Andric  // 256-bit types
274*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzload32 addr:$src)),
275*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
276*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzload64 addr:$src)),
277*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
278*0b57cec5SDimitry Andric}
279*0b57cec5SDimitry Andric
280*0b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
281*0b57cec5SDimitry Andric  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
282*0b57cec5SDimitry Andric  // MOVSS to the lower bits.
283*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
284*0b57cec5SDimitry Andric            (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
285*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
286*0b57cec5SDimitry Andric            (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
287*0b57cec5SDimitry Andric
288*0b57cec5SDimitry Andric  // Move low f32 and clear high bits.
289*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
290*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
291*0b57cec5SDimitry Andric             (v4f32 (VMOVSSrr (v4f32 (V_SET0)),
292*0b57cec5SDimitry Andric              (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>;
293*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
294*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
295*0b57cec5SDimitry Andric             (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
296*0b57cec5SDimitry Andric              (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
297*0b57cec5SDimitry Andric}
298*0b57cec5SDimitry Andric
299*0b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
300*0b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a
301*0b57cec5SDimitry Andric// MOVSS to the lower bits.
302*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
303*0b57cec5SDimitry Andric          (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
304*0b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
305*0b57cec5SDimitry Andric          (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
306*0b57cec5SDimitry Andric}
307*0b57cec5SDimitry Andric
308*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in
309*0b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
310*0b57cec5SDimitry Andric          (MOVSDrm addr:$src)>;
311*0b57cec5SDimitry Andric
312*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in
313*0b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
314*0b57cec5SDimitry Andric          (MOVSSrm addr:$src)>;
315*0b57cec5SDimitry Andric
316*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
317*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
318*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
319*0b57cec5SDimitry Andric
320*0b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC,
321*0b57cec5SDimitry Andric                            X86MemOperand x86memop, PatFrag ld_frag,
322*0b57cec5SDimitry Andric                            string asm, Domain d,
323*0b57cec5SDimitry Andric                            X86SchedWriteMoveLS sched> {
324*0b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in
325*0b57cec5SDimitry Andric  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
326*0b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
327*0b57cec5SDimitry Andric           Sched<[sched.RR]>;
328*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in
329*0b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
330*0b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
331*0b57cec5SDimitry Andric                   [(set RC:$dst, (ld_frag addr:$src))], d>,
332*0b57cec5SDimitry Andric           Sched<[sched.RM]>;
333*0b57cec5SDimitry Andric}
334*0b57cec5SDimitry Andric
335*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
336*0b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
337*0b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
338*0b57cec5SDimitry Andric                                PS, VEX, VEX_WIG;
339*0b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
340*0b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
341*0b57cec5SDimitry Andric                                PD, VEX, VEX_WIG;
342*0b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
343*0b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
344*0b57cec5SDimitry Andric                                PS, VEX, VEX_WIG;
345*0b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
346*0b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
347*0b57cec5SDimitry Andric                                PD, VEX, VEX_WIG;
348*0b57cec5SDimitry Andric
349*0b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
350*0b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
351*0b57cec5SDimitry Andric                                 PS, VEX, VEX_L, VEX_WIG;
352*0b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
353*0b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
354*0b57cec5SDimitry Andric                                 PD, VEX, VEX_L, VEX_WIG;
355*0b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
356*0b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
357*0b57cec5SDimitry Andric                                 PS, VEX, VEX_L, VEX_WIG;
358*0b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
359*0b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
360*0b57cec5SDimitry Andric                                 PD, VEX, VEX_L, VEX_WIG;
361*0b57cec5SDimitry Andric}
362*0b57cec5SDimitry Andric
363*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
364*0b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
365*0b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
366*0b57cec5SDimitry Andric                               PS;
367*0b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
368*0b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
369*0b57cec5SDimitry Andric                               PS;
370*0b57cec5SDimitry Andric}
371*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
372*0b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
373*0b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
374*0b57cec5SDimitry Andric                               PD;
375*0b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
376*0b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
377*0b57cec5SDimitry Andric                               PD;
378*0b57cec5SDimitry Andric}
379*0b57cec5SDimitry Andric
380*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX]  in {
381*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
382*0b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
383*0b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
384*0b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
385*0b57cec5SDimitry Andric                   VEX, VEX_WIG;
386*0b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
387*0b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
388*0b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
389*0b57cec5SDimitry Andric                   VEX, VEX_WIG;
390*0b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
391*0b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
392*0b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>,
393*0b57cec5SDimitry Andric                   VEX, VEX_WIG;
394*0b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
395*0b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
396*0b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>,
397*0b57cec5SDimitry Andric                   VEX, VEX_WIG;
398*0b57cec5SDimitry Andric} // SchedRW
399*0b57cec5SDimitry Andric
400*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
401*0b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
402*0b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
403*0b57cec5SDimitry Andric                   [(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
404*0b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
405*0b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
406*0b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
407*0b57cec5SDimitry Andric                   [(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
408*0b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
409*0b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
410*0b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
411*0b57cec5SDimitry Andric                   [(store (v8f32 VR256:$src), addr:$dst)]>,
412*0b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
413*0b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
414*0b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
415*0b57cec5SDimitry Andric                   [(store (v4f64 VR256:$src), addr:$dst)]>,
416*0b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
417*0b57cec5SDimitry Andric} // SchedRW
418*0b57cec5SDimitry Andric} // Predicate
419*0b57cec5SDimitry Andric
420*0b57cec5SDimitry Andric// For disassembler
421*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
422*0b57cec5SDimitry Andric    isMoveReg = 1 in {
423*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
424*0b57cec5SDimitry Andric  def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
425*0b57cec5SDimitry Andric                          (ins VR128:$src),
426*0b57cec5SDimitry Andric                          "movaps\t{$src, $dst|$dst, $src}", []>,
427*0b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">;
428*0b57cec5SDimitry Andric  def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
429*0b57cec5SDimitry Andric                           (ins VR128:$src),
430*0b57cec5SDimitry Andric                           "movapd\t{$src, $dst|$dst, $src}", []>,
431*0b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">;
432*0b57cec5SDimitry Andric  def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
433*0b57cec5SDimitry Andric                           (ins VR128:$src),
434*0b57cec5SDimitry Andric                           "movups\t{$src, $dst|$dst, $src}", []>,
435*0b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">;
436*0b57cec5SDimitry Andric  def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
437*0b57cec5SDimitry Andric                           (ins VR128:$src),
438*0b57cec5SDimitry Andric                           "movupd\t{$src, $dst|$dst, $src}", []>,
439*0b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
440*0b57cec5SDimitry Andric} // SchedRW
441*0b57cec5SDimitry Andric
442*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
443*0b57cec5SDimitry Andric  def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
444*0b57cec5SDimitry Andric                            (ins VR256:$src),
445*0b57cec5SDimitry Andric                            "movaps\t{$src, $dst|$dst, $src}", []>,
446*0b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">;
447*0b57cec5SDimitry Andric  def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
448*0b57cec5SDimitry Andric                            (ins VR256:$src),
449*0b57cec5SDimitry Andric                            "movapd\t{$src, $dst|$dst, $src}", []>,
450*0b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">;
451*0b57cec5SDimitry Andric  def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
452*0b57cec5SDimitry Andric                            (ins VR256:$src),
453*0b57cec5SDimitry Andric                            "movups\t{$src, $dst|$dst, $src}", []>,
454*0b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">;
455*0b57cec5SDimitry Andric  def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
456*0b57cec5SDimitry Andric                            (ins VR256:$src),
457*0b57cec5SDimitry Andric                            "movupd\t{$src, $dst|$dst, $src}", []>,
458*0b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
459*0b57cec5SDimitry Andric} // SchedRW
460*0b57cec5SDimitry Andric} // Predicate
461*0b57cec5SDimitry Andric
462*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
463*0b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
464*0b57cec5SDimitry Andric                (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
465*0b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
466*0b57cec5SDimitry Andric                (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
467*0b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
468*0b57cec5SDimitry Andric                (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
469*0b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
470*0b57cec5SDimitry Andric                (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
471*0b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
472*0b57cec5SDimitry Andric                (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>;
473*0b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
474*0b57cec5SDimitry Andric                (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>;
475*0b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
476*0b57cec5SDimitry Andric                (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>;
477*0b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
478*0b57cec5SDimitry Andric                (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>;
479*0b57cec5SDimitry Andric
480*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
481*0b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
482*0b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
483*0b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
484*0b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
485*0b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
486*0b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
487*0b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
488*0b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
489*0b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>;
490*0b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
491*0b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
492*0b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>;
493*0b57cec5SDimitry Andric} // SchedRW
494*0b57cec5SDimitry Andric
495*0b57cec5SDimitry Andric// For disassembler
496*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
497*0b57cec5SDimitry Andric    isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
498*0b57cec5SDimitry Andric  def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
499*0b57cec5SDimitry Andric                         "movaps\t{$src, $dst|$dst, $src}", []>,
500*0b57cec5SDimitry Andric                         FoldGenData<"MOVAPSrr">;
501*0b57cec5SDimitry Andric  def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
502*0b57cec5SDimitry Andric                         "movapd\t{$src, $dst|$dst, $src}", []>,
503*0b57cec5SDimitry Andric                         FoldGenData<"MOVAPDrr">;
504*0b57cec5SDimitry Andric  def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
505*0b57cec5SDimitry Andric                         "movups\t{$src, $dst|$dst, $src}", []>,
506*0b57cec5SDimitry Andric                         FoldGenData<"MOVUPSrr">;
507*0b57cec5SDimitry Andric  def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
508*0b57cec5SDimitry Andric                         "movupd\t{$src, $dst|$dst, $src}", []>,
509*0b57cec5SDimitry Andric                         FoldGenData<"MOVUPDrr">;
510*0b57cec5SDimitry Andric}
511*0b57cec5SDimitry Andric
512*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
513*0b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}",
514*0b57cec5SDimitry Andric                (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
515*0b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}",
516*0b57cec5SDimitry Andric                (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
517*0b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}",
518*0b57cec5SDimitry Andric                (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
519*0b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}",
520*0b57cec5SDimitry Andric                (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
521*0b57cec5SDimitry Andric
522*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
523*0b57cec5SDimitry Andric  // 256-bit load/store need to use floating point load/store in case we don't
524*0b57cec5SDimitry Andric  // have AVX2. Execution domain fixing will convert to integer if AVX2 is
525*0b57cec5SDimitry Andric  // available and changing the domain is beneficial.
526*0b57cec5SDimitry Andric  def : Pat<(alignedloadv4i64 addr:$src),
527*0b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
528*0b57cec5SDimitry Andric  def : Pat<(alignedloadv8i32 addr:$src),
529*0b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
530*0b57cec5SDimitry Andric  def : Pat<(alignedloadv16i16 addr:$src),
531*0b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
532*0b57cec5SDimitry Andric  def : Pat<(alignedloadv32i8 addr:$src),
533*0b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
534*0b57cec5SDimitry Andric  def : Pat<(loadv4i64 addr:$src),
535*0b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
536*0b57cec5SDimitry Andric  def : Pat<(loadv8i32 addr:$src),
537*0b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
538*0b57cec5SDimitry Andric  def : Pat<(loadv16i16 addr:$src),
539*0b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
540*0b57cec5SDimitry Andric  def : Pat<(loadv32i8 addr:$src),
541*0b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
542*0b57cec5SDimitry Andric
543*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
544*0b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
545*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
546*0b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
547*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
548*0b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
549*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
550*0b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
551*0b57cec5SDimitry Andric  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
552*0b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
553*0b57cec5SDimitry Andric  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
554*0b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
555*0b57cec5SDimitry Andric  def : Pat<(store (v16i16 VR256:$src), addr:$dst),
556*0b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
557*0b57cec5SDimitry Andric  def : Pat<(store (v32i8 VR256:$src), addr:$dst),
558*0b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
559*0b57cec5SDimitry Andric}
560*0b57cec5SDimitry Andric
561*0b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter).
562*0b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU
563*0b57cec5SDimitry Andric// during the SSE domain pass.
564*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
565*0b57cec5SDimitry Andric  def : Pat<(alignedloadv2i64 addr:$src),
566*0b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
567*0b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
568*0b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
569*0b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
570*0b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
571*0b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
572*0b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
573*0b57cec5SDimitry Andric  def : Pat<(loadv2i64 addr:$src),
574*0b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
575*0b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
576*0b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
577*0b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
578*0b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
579*0b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
580*0b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
581*0b57cec5SDimitry Andric
582*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
583*0b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
584*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
585*0b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
586*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
587*0b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
588*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
589*0b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
590*0b57cec5SDimitry Andric  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
591*0b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
592*0b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
593*0b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
594*0b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
595*0b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
596*0b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
597*0b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
598*0b57cec5SDimitry Andric}
599*0b57cec5SDimitry Andric
600*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
601*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions
602*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
603*0b57cec5SDimitry Andric
604*0b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc,  SDNode pdnode,
605*0b57cec5SDimitry Andric                                      string base_opc, string asm_opr> {
606*0b57cec5SDimitry Andric  // No pattern as they need be special cased between high and low.
607*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
608*0b57cec5SDimitry Andric  def PSrm : PI<opc, MRMSrcMem,
609*0b57cec5SDimitry Andric                (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
610*0b57cec5SDimitry Andric                !strconcat(base_opc, "s", asm_opr),
611*0b57cec5SDimitry Andric                [], SSEPackedSingle>, PS,
612*0b57cec5SDimitry Andric                Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
613*0b57cec5SDimitry Andric
614*0b57cec5SDimitry Andric  def PDrm : PI<opc, MRMSrcMem,
615*0b57cec5SDimitry Andric         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
616*0b57cec5SDimitry Andric         !strconcat(base_opc, "d", asm_opr),
617*0b57cec5SDimitry Andric     [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
618*0b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src2)))))],
619*0b57cec5SDimitry Andric              SSEPackedDouble>, PD,
620*0b57cec5SDimitry Andric     Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
621*0b57cec5SDimitry Andric}
622*0b57cec5SDimitry Andric
623*0b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
624*0b57cec5SDimitry Andric                                 string base_opc> {
625*0b57cec5SDimitry Andric  let Predicates = [UseAVX] in
626*0b57cec5SDimitry Andric    defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
627*0b57cec5SDimitry Andric                                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
628*0b57cec5SDimitry Andric                                    VEX_4V, VEX_WIG;
629*0b57cec5SDimitry Andric
630*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
631*0b57cec5SDimitry Andric    defm NAME : sse12_mov_hilo_packed_base<opc,  pdnode, base_opc,
632*0b57cec5SDimitry Andric                                    "\t{$src2, $dst|$dst, $src2}">;
633*0b57cec5SDimitry Andric}
634*0b57cec5SDimitry Andric
635*0b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
636*0b57cec5SDimitry Andric
637*0b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
638*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
639*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
640*0b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
641*0b57cec5SDimitry Andric                     "movlps\t{$src, $dst|$dst, $src}",
642*0b57cec5SDimitry Andric                     []>,
643*0b57cec5SDimitry Andric                     VEX, VEX_WIG;
644*0b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
645*0b57cec5SDimitry Andric                     "movlpd\t{$src, $dst|$dst, $src}",
646*0b57cec5SDimitry Andric                     [(store (f64 (extractelt (v2f64 VR128:$src),
647*0b57cec5SDimitry Andric                                   (iPTR 0))), addr:$dst)]>,
648*0b57cec5SDimitry Andric                     VEX, VEX_WIG;
649*0b57cec5SDimitry Andric}// UseAVX
650*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
651*0b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
652*0b57cec5SDimitry Andric                   "movlps\t{$src, $dst|$dst, $src}",
653*0b57cec5SDimitry Andric                   []>;
654*0b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
655*0b57cec5SDimitry Andric                   "movlpd\t{$src, $dst|$dst, $src}",
656*0b57cec5SDimitry Andric                   [(store (f64 (extractelt (v2f64 VR128:$src),
657*0b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
658*0b57cec5SDimitry Andric} // SchedRW
659*0b57cec5SDimitry Andric
660*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
661*0b57cec5SDimitry Andric  // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
662*0b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
663*0b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
664*0b57cec5SDimitry Andric  def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
665*0b57cec5SDimitry Andric                      (i8 -28)),
666*0b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
667*0b57cec5SDimitry Andric  def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
668*0b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
669*0b57cec5SDimitry Andric
670*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzload64 addr:$src)),
671*0b57cec5SDimitry Andric            (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
672*0b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
673*0b57cec5SDimitry Andric            (MOVLPSmr addr:$dst, VR128:$src)>;
674*0b57cec5SDimitry Andric}
675*0b57cec5SDimitry Andric
676*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
677*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions
678*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
679*0b57cec5SDimitry Andric
680*0b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
681*0b57cec5SDimitry Andric
682*0b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
683*0b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low
684*0b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible.
685*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
686*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
687*0b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
688*0b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
689*0b57cec5SDimitry Andric                   []>, VEX, VEX_WIG;
690*0b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
691*0b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
692*0b57cec5SDimitry Andric                   [(store (f64 (extractelt
693*0b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
694*0b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
695*0b57cec5SDimitry Andric} // UseAVX
696*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
697*0b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
698*0b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
699*0b57cec5SDimitry Andric                   []>;
700*0b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
701*0b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
702*0b57cec5SDimitry Andric                   [(store (f64 (extractelt
703*0b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
704*0b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
705*0b57cec5SDimitry Andric} // SchedRW
706*0b57cec5SDimitry Andric
707*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
708*0b57cec5SDimitry Andric  // Also handle an i64 load because that may get selected as a faster way to
709*0b57cec5SDimitry Andric  // load the data.
710*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
711*0b57cec5SDimitry Andric                      (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
712*0b57cec5SDimitry Andric            (VMOVHPDrm VR128:$src1, addr:$src2)>;
713*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
714*0b57cec5SDimitry Andric            (VMOVHPDrm VR128:$src1, addr:$src2)>;
715*0b57cec5SDimitry Andric
716*0b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
717*0b57cec5SDimitry Andric                          (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
718*0b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
719*0b57cec5SDimitry Andric            (VMOVHPDmr addr:$dst, VR128:$src)>;
720*0b57cec5SDimitry Andric
721*0b57cec5SDimitry Andric  // MOVLPD patterns
722*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
723*0b57cec5SDimitry Andric            (VMOVLPDrm VR128:$src1, addr:$src2)>;
724*0b57cec5SDimitry Andric}
725*0b57cec5SDimitry Andric
726*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
727*0b57cec5SDimitry Andric  // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
728*0b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
729*0b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
730*0b57cec5SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
731*0b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
732*0b57cec5SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
733*0b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
734*0b57cec5SDimitry Andric
735*0b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
736*0b57cec5SDimitry Andric                                addr:$dst),
737*0b57cec5SDimitry Andric            (MOVHPSmr addr:$dst, VR128:$src)>;
738*0b57cec5SDimitry Andric}
739*0b57cec5SDimitry Andric
740*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
741*0b57cec5SDimitry Andric  // MOVHPD patterns
742*0b57cec5SDimitry Andric
743*0b57cec5SDimitry Andric  // Also handle an i64 load because that may get selected as a faster way to
744*0b57cec5SDimitry Andric  // load the data.
745*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
746*0b57cec5SDimitry Andric                      (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
747*0b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
748*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
749*0b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
750*0b57cec5SDimitry Andric
751*0b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
752*0b57cec5SDimitry Andric                          (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
753*0b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
754*0b57cec5SDimitry Andric            (MOVHPDmr addr:$dst, VR128:$src)>;
755*0b57cec5SDimitry Andric
756*0b57cec5SDimitry Andric  // MOVLPD patterns
757*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
758*0b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
759*0b57cec5SDimitry Andric}
760*0b57cec5SDimitry Andric
761*0b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
762*0b57cec5SDimitry Andric  // Use MOVLPD to load into the low bits from a full vector unless we can use
763*0b57cec5SDimitry Andric  // BLENDPD.
764*0b57cec5SDimitry Andric  def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))),
765*0b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
766*0b57cec5SDimitry Andric}
767*0b57cec5SDimitry Andric
768*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
769*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
770*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
771*0b57cec5SDimitry Andric
772*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
773*0b57cec5SDimitry Andric  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
774*0b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
775*0b57cec5SDimitry Andric                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
776*0b57cec5SDimitry Andric                      [(set VR128:$dst,
777*0b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
778*0b57cec5SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
779*0b57cec5SDimitry Andric  let isCommutable = 1 in
780*0b57cec5SDimitry Andric  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
781*0b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
782*0b57cec5SDimitry Andric                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
783*0b57cec5SDimitry Andric                      [(set VR128:$dst,
784*0b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
785*0b57cec5SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
786*0b57cec5SDimitry Andric                      NotMemoryFoldable;
787*0b57cec5SDimitry Andric}
788*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
789*0b57cec5SDimitry Andric  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
790*0b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
791*0b57cec5SDimitry Andric                      "movlhps\t{$src2, $dst|$dst, $src2}",
792*0b57cec5SDimitry Andric                      [(set VR128:$dst,
793*0b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
794*0b57cec5SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>;
795*0b57cec5SDimitry Andric  let isCommutable = 1 in
796*0b57cec5SDimitry Andric  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
797*0b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
798*0b57cec5SDimitry Andric                      "movhlps\t{$src2, $dst|$dst, $src2}",
799*0b57cec5SDimitry Andric                      [(set VR128:$dst,
800*0b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
801*0b57cec5SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
802*0b57cec5SDimitry Andric}
803*0b57cec5SDimitry Andric
804*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
805*0b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions
806*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
807*0b57cec5SDimitry Andric
808*0b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
809*0b57cec5SDimitry Andric                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
810*0b57cec5SDimitry Andric                     string asm, string mem, X86FoldableSchedWrite sched,
811*0b57cec5SDimitry Andric                     SchedRead Int2Fpu = ReadDefault> {
812*0b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
813*0b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
814*0b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
815*0b57cec5SDimitry Andric              Sched<[sched, Int2Fpu]>;
816*0b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
817*0b57cec5SDimitry Andric              mem#"\t{$src, $dst|$dst, $src}",
818*0b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
819*0b57cec5SDimitry Andric              Sched<[sched.Folded]>;
820*0b57cec5SDimitry Andric}
821*0b57cec5SDimitry Andric
822*0b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
823*0b57cec5SDimitry Andric                       ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
824*0b57cec5SDimitry Andric                       string asm, Domain d, X86FoldableSchedWrite sched> {
825*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
826*0b57cec5SDimitry Andric  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
827*0b57cec5SDimitry Andric             [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>,
828*0b57cec5SDimitry Andric             Sched<[sched]>;
829*0b57cec5SDimitry Andric  let mayLoad = 1 in
830*0b57cec5SDimitry Andric  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
831*0b57cec5SDimitry Andric             [(set RC:$dst, (DstTy (sint_to_fp
832*0b57cec5SDimitry Andric                                    (SrcTy (ld_frag addr:$src)))))], d>,
833*0b57cec5SDimitry Andric             Sched<[sched.Folded]>;
834*0b57cec5SDimitry Andric}
835*0b57cec5SDimitry Andric}
836*0b57cec5SDimitry Andric
837*0b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
838*0b57cec5SDimitry Andric                          X86MemOperand x86memop, string asm, string mem,
839*0b57cec5SDimitry Andric                          X86FoldableSchedWrite sched> {
840*0b57cec5SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX] in {
841*0b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
842*0b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
843*0b57cec5SDimitry Andric              Sched<[sched, ReadDefault, ReadInt2Fpu]>;
844*0b57cec5SDimitry Andric  let mayLoad = 1 in
845*0b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
846*0b57cec5SDimitry Andric              (ins DstRC:$src1, x86memop:$src),
847*0b57cec5SDimitry Andric              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
848*0b57cec5SDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>;
849*0b57cec5SDimitry Andric} // hasSideEffects = 0
850*0b57cec5SDimitry Andric}
851*0b57cec5SDimitry Andric
852*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX] in {
853*0b57cec5SDimitry Andricdefm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
854*0b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
855*0b57cec5SDimitry Andric                                WriteCvtSS2I>,
856*0b57cec5SDimitry Andric                                XS, VEX, VEX_LIG;
857*0b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
858*0b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
859*0b57cec5SDimitry Andric                                WriteCvtSS2I>,
860*0b57cec5SDimitry Andric                                XS, VEX, VEX_W, VEX_LIG;
861*0b57cec5SDimitry Andricdefm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
862*0b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
863*0b57cec5SDimitry Andric                                WriteCvtSD2I>,
864*0b57cec5SDimitry Andric                                XD, VEX, VEX_LIG;
865*0b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
866*0b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
867*0b57cec5SDimitry Andric                                WriteCvtSD2I>,
868*0b57cec5SDimitry Andric                                XD, VEX, VEX_W, VEX_LIG;
869*0b57cec5SDimitry Andric}
870*0b57cec5SDimitry Andric
871*0b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx
872*0b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands,
873*0b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly
874*0b57cec5SDimitry Andric// where appropriate to do so.
875*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
876*0b57cec5SDimitry Andricdefm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
877*0b57cec5SDimitry Andric                                  WriteCvtI2SS>, XS, VEX_4V, VEX_LIG;
878*0b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
879*0b57cec5SDimitry Andric                                  WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG;
880*0b57cec5SDimitry Andricdefm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
881*0b57cec5SDimitry Andric                                  WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
882*0b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
883*0b57cec5SDimitry Andric                                  WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG;
884*0b57cec5SDimitry Andric} // isCodeGenOnly = 1
885*0b57cec5SDimitry Andric
886*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
887*0b57cec5SDimitry Andric  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
888*0b57cec5SDimitry Andric            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
889*0b57cec5SDimitry Andric  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
890*0b57cec5SDimitry Andric            (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
891*0b57cec5SDimitry Andric  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
892*0b57cec5SDimitry Andric            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
893*0b57cec5SDimitry Andric  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
894*0b57cec5SDimitry Andric            (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
895*0b57cec5SDimitry Andric
896*0b57cec5SDimitry Andric  def : Pat<(f32 (sint_to_fp GR32:$src)),
897*0b57cec5SDimitry Andric            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
898*0b57cec5SDimitry Andric  def : Pat<(f32 (sint_to_fp GR64:$src)),
899*0b57cec5SDimitry Andric            (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
900*0b57cec5SDimitry Andric  def : Pat<(f64 (sint_to_fp GR32:$src)),
901*0b57cec5SDimitry Andric            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
902*0b57cec5SDimitry Andric  def : Pat<(f64 (sint_to_fp GR64:$src)),
903*0b57cec5SDimitry Andric            (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
904*0b57cec5SDimitry Andric}
905*0b57cec5SDimitry Andric
906*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
907*0b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
908*0b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
909*0b57cec5SDimitry Andric                      WriteCvtSS2I>, XS;
910*0b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
911*0b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
912*0b57cec5SDimitry Andric                      WriteCvtSS2I>, XS, REX_W;
913*0b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
914*0b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
915*0b57cec5SDimitry Andric                      WriteCvtSD2I>, XD;
916*0b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
917*0b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
918*0b57cec5SDimitry Andric                      WriteCvtSD2I>, XD, REX_W;
919*0b57cec5SDimitry Andricdefm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
920*0b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{l}",
921*0b57cec5SDimitry Andric                      WriteCvtI2SS, ReadInt2Fpu>, XS;
922*0b57cec5SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
923*0b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{q}",
924*0b57cec5SDimitry Andric                      WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W;
925*0b57cec5SDimitry Andricdefm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
926*0b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{l}",
927*0b57cec5SDimitry Andric                      WriteCvtI2SD, ReadInt2Fpu>, XD;
928*0b57cec5SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
929*0b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{q}",
930*0b57cec5SDimitry Andric                      WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W;
931*0b57cec5SDimitry Andric} // isCodeGenOnly = 1
932*0b57cec5SDimitry Andric
933*0b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM
934*0b57cec5SDimitry Andric// and/or XMM operand(s).
935*0b57cec5SDimitry Andric
936*0b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
937*0b57cec5SDimitry Andric                          ValueType DstVT, ValueType SrcVT, SDNode OpNode,
938*0b57cec5SDimitry Andric                          Operand memop, ComplexPattern mem_cpat, string asm,
939*0b57cec5SDimitry Andric                          X86FoldableSchedWrite sched> {
940*0b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
941*0b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
942*0b57cec5SDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
943*0b57cec5SDimitry Andric               Sched<[sched]>;
944*0b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
945*0b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
946*0b57cec5SDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
947*0b57cec5SDimitry Andric               Sched<[sched.Folded]>;
948*0b57cec5SDimitry Andric}
949*0b57cec5SDimitry Andric
950*0b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
951*0b57cec5SDimitry Andric                    RegisterClass DstRC, X86MemOperand x86memop,
952*0b57cec5SDimitry Andric                    string asm, string mem, X86FoldableSchedWrite sched,
953*0b57cec5SDimitry Andric                    bit Is2Addr = 1> {
954*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
955*0b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
956*0b57cec5SDimitry Andric                  !if(Is2Addr,
957*0b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
958*0b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
959*0b57cec5SDimitry Andric                  []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
960*0b57cec5SDimitry Andric  let mayLoad = 1 in
961*0b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
962*0b57cec5SDimitry Andric                  (ins DstRC:$src1, x86memop:$src2),
963*0b57cec5SDimitry Andric                  !if(Is2Addr,
964*0b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
965*0b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
966*0b57cec5SDimitry Andric                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
967*0b57cec5SDimitry Andric}
968*0b57cec5SDimitry Andric}
969*0b57cec5SDimitry Andric
970*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
971*0b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
972*0b57cec5SDimitry Andric                  X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
973*0b57cec5SDimitry Andric                  WriteCvtSD2I>, XD, VEX, VEX_LIG;
974*0b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
975*0b57cec5SDimitry Andric                    X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
976*0b57cec5SDimitry Andric                    WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
977*0b57cec5SDimitry Andric}
978*0b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
979*0b57cec5SDimitry Andric                 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
980*0b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
981*0b57cec5SDimitry Andric                   sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
982*0b57cec5SDimitry Andric
983*0b57cec5SDimitry Andric
984*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
985*0b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
986*0b57cec5SDimitry Andric          i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG;
987*0b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
988*0b57cec5SDimitry Andric          i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W;
989*0b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
990*0b57cec5SDimitry Andric          i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG;
991*0b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
992*0b57cec5SDimitry Andric          i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W;
993*0b57cec5SDimitry Andric}
994*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
995*0b57cec5SDimitry Andric  defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
996*0b57cec5SDimitry Andric                        i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS;
997*0b57cec5SDimitry Andric  defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
998*0b57cec5SDimitry Andric                        i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W;
999*0b57cec5SDimitry Andric  defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1000*0b57cec5SDimitry Andric                        i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD;
1001*0b57cec5SDimitry Andric  defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1002*0b57cec5SDimitry Andric                        i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W;
1003*0b57cec5SDimitry Andric}
1004*0b57cec5SDimitry Andric
1005*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1006*0b57cec5SDimitry Andric               (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
1007*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1008*0b57cec5SDimitry Andric               (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
1009*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1010*0b57cec5SDimitry Andric               (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
1011*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1012*0b57cec5SDimitry Andric               (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
1013*0b57cec5SDimitry Andric
1014*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
1015*0b57cec5SDimitry Andric              (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
1016*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
1017*0b57cec5SDimitry Andric              (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
1018*0b57cec5SDimitry Andric
1019*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
1020*0b57cec5SDimitry Andric                (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
1021*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
1022*0b57cec5SDimitry Andric                (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
1023*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
1024*0b57cec5SDimitry Andric                (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
1025*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
1026*0b57cec5SDimitry Andric                (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
1027*0b57cec5SDimitry Andric
1028*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
1029*0b57cec5SDimitry Andric                (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
1030*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
1031*0b57cec5SDimitry Andric                (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
1032*0b57cec5SDimitry Andric
1033*0b57cec5SDimitry Andric/// SSE 1 Only
1034*0b57cec5SDimitry Andric
1035*0b57cec5SDimitry Andric// Aliases for intrinsics
1036*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
1037*0b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
1038*0b57cec5SDimitry Andric                                ssmem, sse_load_f32, "cvttss2si",
1039*0b57cec5SDimitry Andric                                WriteCvtSS2I>, XS, VEX, VEX_LIG;
1040*0b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
1041*0b57cec5SDimitry Andric                               X86cvtts2Int, ssmem, sse_load_f32,
1042*0b57cec5SDimitry Andric                               "cvttss2si", WriteCvtSS2I>,
1043*0b57cec5SDimitry Andric                               XS, VEX, VEX_LIG, VEX_W;
1044*0b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
1045*0b57cec5SDimitry Andric                                sdmem, sse_load_f64, "cvttsd2si",
1046*0b57cec5SDimitry Andric                                WriteCvtSS2I>, XD, VEX, VEX_LIG;
1047*0b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
1048*0b57cec5SDimitry Andric                              X86cvtts2Int, sdmem, sse_load_f64,
1049*0b57cec5SDimitry Andric                              "cvttsd2si", WriteCvtSS2I>,
1050*0b57cec5SDimitry Andric                              XD, VEX, VEX_LIG, VEX_W;
1051*0b57cec5SDimitry Andric}
1052*0b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
1053*0b57cec5SDimitry Andric                                    ssmem, sse_load_f32, "cvttss2si",
1054*0b57cec5SDimitry Andric                                    WriteCvtSS2I>, XS;
1055*0b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
1056*0b57cec5SDimitry Andric                                   X86cvtts2Int, ssmem, sse_load_f32,
1057*0b57cec5SDimitry Andric                                   "cvttss2si", WriteCvtSS2I>, XS, REX_W;
1058*0b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
1059*0b57cec5SDimitry Andric                                    sdmem, sse_load_f64, "cvttsd2si",
1060*0b57cec5SDimitry Andric                                    WriteCvtSD2I>, XD;
1061*0b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
1062*0b57cec5SDimitry Andric                                  X86cvtts2Int, sdmem, sse_load_f64,
1063*0b57cec5SDimitry Andric                                  "cvttsd2si", WriteCvtSD2I>, XD, REX_W;
1064*0b57cec5SDimitry Andric
1065*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
1066*0b57cec5SDimitry Andric                (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1067*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
1068*0b57cec5SDimitry Andric                (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
1069*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
1070*0b57cec5SDimitry Andric                (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1071*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
1072*0b57cec5SDimitry Andric                (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
1073*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
1074*0b57cec5SDimitry Andric                (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1075*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
1076*0b57cec5SDimitry Andric                (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
1077*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
1078*0b57cec5SDimitry Andric                (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1079*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
1080*0b57cec5SDimitry Andric                (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
1081*0b57cec5SDimitry Andric
1082*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
1083*0b57cec5SDimitry Andric                (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1084*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
1085*0b57cec5SDimitry Andric                (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
1086*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
1087*0b57cec5SDimitry Andric                (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1088*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
1089*0b57cec5SDimitry Andric                (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
1090*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
1091*0b57cec5SDimitry Andric                (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1092*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
1093*0b57cec5SDimitry Andric                (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
1094*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
1095*0b57cec5SDimitry Andric                (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1096*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
1097*0b57cec5SDimitry Andric                (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
1098*0b57cec5SDimitry Andric
1099*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
1100*0b57cec5SDimitry Andricdefm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
1101*0b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
1102*0b57cec5SDimitry Andric                                  WriteCvtSS2I>, XS, VEX, VEX_LIG;
1103*0b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
1104*0b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
1105*0b57cec5SDimitry Andric                                  WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
1106*0b57cec5SDimitry Andric}
1107*0b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
1108*0b57cec5SDimitry Andric                               ssmem, sse_load_f32, "cvtss2si",
1109*0b57cec5SDimitry Andric                               WriteCvtSS2I>, XS;
1110*0b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
1111*0b57cec5SDimitry Andric                                 ssmem, sse_load_f32, "cvtss2si",
1112*0b57cec5SDimitry Andric                                 WriteCvtSS2I>, XS, REX_W;
1113*0b57cec5SDimitry Andric
1114*0b57cec5SDimitry Andricdefm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
1115*0b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1116*0b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PS>,
1117*0b57cec5SDimitry Andric                               PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
1118*0b57cec5SDimitry Andricdefm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
1119*0b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1120*0b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PSY>,
1121*0b57cec5SDimitry Andric                               PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
1122*0b57cec5SDimitry Andric
1123*0b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
1124*0b57cec5SDimitry Andric                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
1125*0b57cec5SDimitry Andric                            SSEPackedSingle, WriteCvtI2PS>,
1126*0b57cec5SDimitry Andric                            PS, Requires<[UseSSE2]>;
1127*0b57cec5SDimitry Andric
1128*0b57cec5SDimitry Andric// AVX aliases
1129*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
1130*0b57cec5SDimitry Andric                (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1131*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
1132*0b57cec5SDimitry Andric                (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
1133*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
1134*0b57cec5SDimitry Andric                (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1135*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
1136*0b57cec5SDimitry Andric                (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
1137*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
1138*0b57cec5SDimitry Andric                (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1139*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
1140*0b57cec5SDimitry Andric                (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
1141*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
1142*0b57cec5SDimitry Andric                (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1143*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
1144*0b57cec5SDimitry Andric                (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
1145*0b57cec5SDimitry Andric
1146*0b57cec5SDimitry Andric// SSE aliases
1147*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
1148*0b57cec5SDimitry Andric                (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1149*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
1150*0b57cec5SDimitry Andric                (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
1151*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
1152*0b57cec5SDimitry Andric                (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
1153*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
1154*0b57cec5SDimitry Andric                (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
1155*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
1156*0b57cec5SDimitry Andric                (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1157*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
1158*0b57cec5SDimitry Andric                (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
1159*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
1160*0b57cec5SDimitry Andric                (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
1161*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
1162*0b57cec5SDimitry Andric                (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
1163*0b57cec5SDimitry Andric
1164*0b57cec5SDimitry Andric/// SSE 2 Only
1165*0b57cec5SDimitry Andric
1166*0b57cec5SDimitry Andric// Convert scalar double to scalar single
1167*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in {
1168*0b57cec5SDimitry Andricdef VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
1169*0b57cec5SDimitry Andric                        (ins FR32:$src1, FR64:$src2),
1170*0b57cec5SDimitry Andric                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
1171*0b57cec5SDimitry Andric                        VEX_4V, VEX_LIG, VEX_WIG,
1172*0b57cec5SDimitry Andric                        Sched<[WriteCvtSD2SS]>;
1173*0b57cec5SDimitry Andriclet mayLoad = 1 in
1174*0b57cec5SDimitry Andricdef VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
1175*0b57cec5SDimitry Andric                     (ins FR32:$src1, f64mem:$src2),
1176*0b57cec5SDimitry Andric                     "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
1177*0b57cec5SDimitry Andric                     XD, VEX_4V, VEX_LIG, VEX_WIG,
1178*0b57cec5SDimitry Andric                     Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
1179*0b57cec5SDimitry Andric}
1180*0b57cec5SDimitry Andric
1181*0b57cec5SDimitry Andricdef : Pat<(f32 (fpround FR64:$src)),
1182*0b57cec5SDimitry Andric            (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
1183*0b57cec5SDimitry Andric          Requires<[UseAVX]>;
1184*0b57cec5SDimitry Andric
1185*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
1186*0b57cec5SDimitry Andricdef CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
1187*0b57cec5SDimitry Andric                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1188*0b57cec5SDimitry Andric                      [(set FR32:$dst, (fpround FR64:$src))]>,
1189*0b57cec5SDimitry Andric                      Sched<[WriteCvtSD2SS]>;
1190*0b57cec5SDimitry Andricdef CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
1191*0b57cec5SDimitry Andric                    "cvtsd2ss\t{$src, $dst|$dst, $src}",
1192*0b57cec5SDimitry Andric                    [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
1193*0b57cec5SDimitry Andric                    XD, Requires<[UseSSE2, OptForSize]>,
1194*0b57cec5SDimitry Andric                    Sched<[WriteCvtSD2SS.Folded]>;
1195*0b57cec5SDimitry Andric}
1196*0b57cec5SDimitry Andric
1197*0b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
1198*0b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1199*0b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1200*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1201*0b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
1202*0b57cec5SDimitry Andric                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
1203*0b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS]>;
1204*0b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
1205*0b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
1206*0b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1207*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1208*0b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
1209*0b57cec5SDimitry Andric                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
1210*0b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
1211*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
1212*0b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
1213*0b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1214*0b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
1215*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1216*0b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
1217*0b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
1218*0b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
1219*0b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
1220*0b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
1221*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1222*0b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
1223*0b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>,
1224*0b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
1225*0b57cec5SDimitry Andric}
1226*0b57cec5SDimitry Andric
1227*0b57cec5SDimitry Andric// Convert scalar single to scalar double
1228*0b57cec5SDimitry Andric// SSE2 instructions with XS prefix
1229*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0 in {
1230*0b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
1231*0b57cec5SDimitry Andric                    (ins FR64:$src1, FR32:$src2),
1232*0b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
1233*0b57cec5SDimitry Andric                    XS, VEX_4V, VEX_LIG, VEX_WIG,
1234*0b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>;
1235*0b57cec5SDimitry Andriclet mayLoad = 1 in
1236*0b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
1237*0b57cec5SDimitry Andric                    (ins FR64:$src1, f32mem:$src2),
1238*0b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
1239*0b57cec5SDimitry Andric                    XS, VEX_4V, VEX_LIG, VEX_WIG,
1240*0b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
1241*0b57cec5SDimitry Andric                    Requires<[UseAVX, OptForSize]>;
1242*0b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0
1243*0b57cec5SDimitry Andric
1244*0b57cec5SDimitry Andricdef : Pat<(f64 (fpextend FR32:$src)),
1245*0b57cec5SDimitry Andric    (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
1246*0b57cec5SDimitry Andricdef : Pat<(fpextend (loadf32 addr:$src)),
1247*0b57cec5SDimitry Andric    (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
1248*0b57cec5SDimitry Andric
1249*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
1250*0b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
1251*0b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1252*0b57cec5SDimitry Andric                   [(set FR64:$dst, (fpextend FR32:$src))]>,
1253*0b57cec5SDimitry Andric                   XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
1254*0b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
1255*0b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1256*0b57cec5SDimitry Andric                   [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
1257*0b57cec5SDimitry Andric                   XS, Requires<[UseSSE2, OptForSize]>,
1258*0b57cec5SDimitry Andric                   Sched<[WriteCvtSS2SD.Folded]>;
1259*0b57cec5SDimitry Andric} // isCodeGenOnly = 1
1260*0b57cec5SDimitry Andric
1261*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
1262*0b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
1263*0b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1264*0b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1265*0b57cec5SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
1266*0b57cec5SDimitry Andric                    Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
1267*0b57cec5SDimitry Andriclet mayLoad = 1 in
1268*0b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
1269*0b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
1270*0b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1271*0b57cec5SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
1272*0b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
1273*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
1274*0b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
1275*0b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1276*0b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1277*0b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
1278*0b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD]>;
1279*0b57cec5SDimitry Andriclet mayLoad = 1 in
1280*0b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
1281*0b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
1282*0b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1283*0b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
1284*0b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
1285*0b57cec5SDimitry Andric}
1286*0b57cec5SDimitry Andric} // hasSideEffects = 0
1287*0b57cec5SDimitry Andric
1288*0b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
1289*0b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
1290*0b57cec5SDimitry Andric// vmovs{s,d} instructions
1291*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
1292*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1293*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1294*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1295*0b57cec5SDimitry Andric                     (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
1296*0b57cec5SDimitry Andric          (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
1297*0b57cec5SDimitry Andric
1298*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1299*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1300*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1301*0b57cec5SDimitry Andric                     (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
1302*0b57cec5SDimitry Andric          (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
1303*0b57cec5SDimitry Andric
1304*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1305*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1306*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
1307*0b57cec5SDimitry Andric          (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
1308*0b57cec5SDimitry Andric
1309*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1310*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1311*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
1312*0b57cec5SDimitry Andric          (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
1313*0b57cec5SDimitry Andric
1314*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1315*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1316*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
1317*0b57cec5SDimitry Andric          (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
1318*0b57cec5SDimitry Andric
1319*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1320*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1321*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
1322*0b57cec5SDimitry Andric          (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
1323*0b57cec5SDimitry Andric
1324*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1325*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1326*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
1327*0b57cec5SDimitry Andric          (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
1328*0b57cec5SDimitry Andric
1329*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1330*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1331*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
1332*0b57cec5SDimitry Andric          (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
1333*0b57cec5SDimitry Andric
1334*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1335*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1336*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
1337*0b57cec5SDimitry Andric          (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
1338*0b57cec5SDimitry Andric
1339*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1340*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1341*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
1342*0b57cec5SDimitry Andric          (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
1343*0b57cec5SDimitry Andric} // Predicates = [UseAVX]
1344*0b57cec5SDimitry Andric
1345*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1346*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1347*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1348*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1349*0b57cec5SDimitry Andric                     (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
1350*0b57cec5SDimitry Andric          (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
1351*0b57cec5SDimitry Andric
1352*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1353*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1354*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1355*0b57cec5SDimitry Andric                     (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
1356*0b57cec5SDimitry Andric          (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
1357*0b57cec5SDimitry Andric
1358*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1359*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1360*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
1361*0b57cec5SDimitry Andric          (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
1362*0b57cec5SDimitry Andric
1363*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1364*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1365*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
1366*0b57cec5SDimitry Andric          (CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
1367*0b57cec5SDimitry Andric
1368*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1369*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1370*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
1371*0b57cec5SDimitry Andric          (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
1372*0b57cec5SDimitry Andric
1373*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
1374*0b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1375*0b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
1376*0b57cec5SDimitry Andric          (CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
1377*0b57cec5SDimitry Andric} // Predicates = [UseSSE2]
1378*0b57cec5SDimitry Andric
1379*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
1380*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1381*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1382*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
1383*0b57cec5SDimitry Andric          (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
1384*0b57cec5SDimitry Andric
1385*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1386*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1387*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
1388*0b57cec5SDimitry Andric          (CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
1389*0b57cec5SDimitry Andric
1390*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1391*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1392*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
1393*0b57cec5SDimitry Andric          (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
1394*0b57cec5SDimitry Andric
1395*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
1396*0b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1397*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
1398*0b57cec5SDimitry Andric          (CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
1399*0b57cec5SDimitry Andric} // Predicates = [UseSSE1]
1400*0b57cec5SDimitry Andric
1401*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1402*0b57cec5SDimitry Andric// Convert packed single/double fp to doubleword
1403*0b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1404*0b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
1405*0b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
1406*0b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
1407*0b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1408*0b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
1409*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1410*0b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
1411*0b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
1412*0b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1413*0b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1414*0b57cec5SDimitry Andric                        [(set VR256:$dst,
1415*0b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
1416*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
1417*0b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1418*0b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1419*0b57cec5SDimitry Andric                        [(set VR256:$dst,
1420*0b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
1421*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
1422*0b57cec5SDimitry Andric}
1423*0b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1424*0b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
1425*0b57cec5SDimitry Andric                     [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
1426*0b57cec5SDimitry Andric                     Sched<[WriteCvtPS2I]>;
1427*0b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1428*0b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
1429*0b57cec5SDimitry Andric                     [(set VR128:$dst,
1430*0b57cec5SDimitry Andric                       (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
1431*0b57cec5SDimitry Andric                     Sched<[WriteCvtPS2ILd]>;
1432*0b57cec5SDimitry Andric
1433*0b57cec5SDimitry Andric
1434*0b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers
1435*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1436*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
1437*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
1438*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1439*0b57cec5SDimitry Andricdef VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1440*0b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1441*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1442*0b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
1443*0b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
1444*0b57cec5SDimitry Andric
1445*0b57cec5SDimitry Andric// XMM only
1446*0b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1447*0b57cec5SDimitry Andric                      "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
1448*0b57cec5SDimitry Andric                      [(set VR128:$dst,
1449*0b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
1450*0b57cec5SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, VEX_WIG;
1451*0b57cec5SDimitry Andric
1452*0b57cec5SDimitry Andric// YMM only
1453*0b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1454*0b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1455*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1456*0b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
1457*0b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
1458*0b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
1459*0b57cec5SDimitry Andric                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
1460*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1461*0b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
1462*0b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
1463*0b57cec5SDimitry Andric}
1464*0b57cec5SDimitry Andric
1465*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
1466*0b57cec5SDimitry Andric                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
1467*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
1468*0b57cec5SDimitry Andric                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
1469*0b57cec5SDimitry Andric
1470*0b57cec5SDimitry Andricdef CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1471*0b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
1472*0b57cec5SDimitry Andric                      [(set VR128:$dst,
1473*0b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
1474*0b57cec5SDimitry Andric                      Sched<[WriteCvtPD2ILd]>;
1475*0b57cec5SDimitry Andricdef CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1476*0b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
1477*0b57cec5SDimitry Andric                      [(set VR128:$dst,
1478*0b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
1479*0b57cec5SDimitry Andric                      Sched<[WriteCvtPD2I]>;
1480*0b57cec5SDimitry Andric
1481*0b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword
1482*0b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix
1483*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1484*0b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1485*0b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
1486*0b57cec5SDimitry Andric                         [(set VR128:$dst,
1487*0b57cec5SDimitry Andric                           (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
1488*0b57cec5SDimitry Andric                         VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
1489*0b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1490*0b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
1491*0b57cec5SDimitry Andric                         [(set VR128:$dst,
1492*0b57cec5SDimitry Andric                           (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>,
1493*0b57cec5SDimitry Andric                         VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
1494*0b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1495*0b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
1496*0b57cec5SDimitry Andric                          [(set VR256:$dst,
1497*0b57cec5SDimitry Andric                            (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>,
1498*0b57cec5SDimitry Andric                          VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
1499*0b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1500*0b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
1501*0b57cec5SDimitry Andric                          [(set VR256:$dst,
1502*0b57cec5SDimitry Andric                            (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>,
1503*0b57cec5SDimitry Andric                          VEX, VEX_L,
1504*0b57cec5SDimitry Andric                          Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
1505*0b57cec5SDimitry Andric}
1506*0b57cec5SDimitry Andric
1507*0b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1508*0b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
1509*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1510*0b57cec5SDimitry Andric                         (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>,
1511*0b57cec5SDimitry Andric                       Sched<[WriteCvtPS2I]>;
1512*0b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1513*0b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
1514*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1515*0b57cec5SDimitry Andric                         (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
1516*0b57cec5SDimitry Andric                       Sched<[WriteCvtPS2ILd]>;
1517*0b57cec5SDimitry Andric
1518*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
1519*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
1520*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1521*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1522*0b57cec5SDimitry Andric// XMM only
1523*0b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1524*0b57cec5SDimitry Andric                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
1525*0b57cec5SDimitry Andric                        [(set VR128:$dst,
1526*0b57cec5SDimitry Andric                          (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
1527*0b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
1528*0b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1529*0b57cec5SDimitry Andric                        "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
1530*0b57cec5SDimitry Andric                        [(set VR128:$dst,
1531*0b57cec5SDimitry Andric                          (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>,
1532*0b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
1533*0b57cec5SDimitry Andric
1534*0b57cec5SDimitry Andric// YMM only
1535*0b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1536*0b57cec5SDimitry Andric                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
1537*0b57cec5SDimitry Andric                         [(set VR128:$dst,
1538*0b57cec5SDimitry Andric                           (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>,
1539*0b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
1540*0b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
1541*0b57cec5SDimitry Andric                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
1542*0b57cec5SDimitry Andric                         [(set VR128:$dst,
1543*0b57cec5SDimitry Andric                           (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
1544*0b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
1545*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
1546*0b57cec5SDimitry Andric
1547*0b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
1548*0b57cec5SDimitry Andric                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
1549*0b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
1550*0b57cec5SDimitry Andric                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
1551*0b57cec5SDimitry Andric
1552*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1553*0b57cec5SDimitry Andric  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
1554*0b57cec5SDimitry Andric            (VCVTTPD2DQYrr VR256:$src)>;
1555*0b57cec5SDimitry Andric  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
1556*0b57cec5SDimitry Andric            (VCVTTPD2DQYrm addr:$src)>;
1557*0b57cec5SDimitry Andric}
1558*0b57cec5SDimitry Andric
1559*0b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1560*0b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
1561*0b57cec5SDimitry Andric                      [(set VR128:$dst,
1562*0b57cec5SDimitry Andric                        (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
1563*0b57cec5SDimitry Andric                      Sched<[WriteCvtPD2I]>;
1564*0b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
1565*0b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
1566*0b57cec5SDimitry Andric                      [(set VR128:$dst,
1567*0b57cec5SDimitry Andric                        (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
1568*0b57cec5SDimitry Andric                      Sched<[WriteCvtPD2ILd]>;
1569*0b57cec5SDimitry Andric
1570*0b57cec5SDimitry Andric// Convert packed single to packed double
1571*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1572*0b57cec5SDimitry Andric                  // SSE2 instructions without OpSize prefix
1573*0b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1574*0b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
1575*0b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
1576*0b57cec5SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
1577*0b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1578*0b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
1579*0b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
1580*0b57cec5SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
1581*0b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
1582*0b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
1583*0b57cec5SDimitry Andric                     [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
1584*0b57cec5SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
1585*0b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
1586*0b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
1587*0b57cec5SDimitry Andric                     [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
1588*0b57cec5SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
1589*0b57cec5SDimitry Andric}
1590*0b57cec5SDimitry Andric
1591*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1592*0b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1593*0b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
1594*0b57cec5SDimitry Andric                   [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>,
1595*0b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD]>;
1596*0b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
1597*0b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
1598*0b57cec5SDimitry Andric                   [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
1599*0b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD.Folded]>;
1600*0b57cec5SDimitry Andric}
1601*0b57cec5SDimitry Andric
1602*0b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP
1603*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1604*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
1605*0b57cec5SDimitry Andricdef VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1606*0b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1607*0b57cec5SDimitry Andric                        [(set VR128:$dst,
1608*0b57cec5SDimitry Andric                          (v2f64 (X86VSintToFP
1609*0b57cec5SDimitry Andric                                  (bc_v4i32
1610*0b57cec5SDimitry Andric                                   (v2i64 (scalar_to_vector
1611*0b57cec5SDimitry Andric                                           (loadi64 addr:$src)))))))]>,
1612*0b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
1613*0b57cec5SDimitry Andricdef VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1614*0b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1615*0b57cec5SDimitry Andric                        [(set VR128:$dst,
1616*0b57cec5SDimitry Andric                          (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
1617*0b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
1618*0b57cec5SDimitry Andricdef VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
1619*0b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1620*0b57cec5SDimitry Andric                         [(set VR256:$dst,
1621*0b57cec5SDimitry Andric                           (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
1622*0b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
1623*0b57cec5SDimitry Andric                         VEX_WIG;
1624*0b57cec5SDimitry Andricdef VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
1625*0b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1626*0b57cec5SDimitry Andric                         [(set VR256:$dst,
1627*0b57cec5SDimitry Andric                           (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
1628*0b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
1629*0b57cec5SDimitry Andric}
1630*0b57cec5SDimitry Andric
1631*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
1632*0b57cec5SDimitry Andricdef CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1633*0b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
1634*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1635*0b57cec5SDimitry Andric                         (v2f64 (X86VSintToFP
1636*0b57cec5SDimitry Andric                                 (bc_v4i32
1637*0b57cec5SDimitry Andric                                  (v2i64 (scalar_to_vector
1638*0b57cec5SDimitry Andric                                          (loadi64 addr:$src)))))))]>,
1639*0b57cec5SDimitry Andric                       Sched<[WriteCvtI2PDLd]>;
1640*0b57cec5SDimitry Andricdef CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1641*0b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
1642*0b57cec5SDimitry Andric                       [(set VR128:$dst,
1643*0b57cec5SDimitry Andric                         (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
1644*0b57cec5SDimitry Andric                       Sched<[WriteCvtI2PD]>;
1645*0b57cec5SDimitry Andric
1646*0b57cec5SDimitry Andric// AVX register conversion intrinsics
1647*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1648*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
1649*0b57cec5SDimitry Andric            (VCVTDQ2PDrm addr:$src)>;
1650*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
1651*0b57cec5SDimitry Andric
1652*0b57cec5SDimitry Andric// SSE2 register conversion intrinsics
1653*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1654*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
1655*0b57cec5SDimitry Andric            (CVTDQ2PDrm addr:$src)>;
1656*0b57cec5SDimitry Andric} // Predicates = [UseSSE2]
1657*0b57cec5SDimitry Andric
1658*0b57cec5SDimitry Andric// Convert packed double to packed single
1659*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
1660*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
1661*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1662*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1663*0b57cec5SDimitry Andric// XMM only
1664*0b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1665*0b57cec5SDimitry Andric                       "cvtpd2ps\t{$src, $dst|$dst, $src}",
1666*0b57cec5SDimitry Andric                       [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
1667*0b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
1668*0b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1669*0b57cec5SDimitry Andric                       "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
1670*0b57cec5SDimitry Andric                       [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
1671*0b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
1672*0b57cec5SDimitry Andric
1673*0b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
1674*0b57cec5SDimitry Andric                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
1675*0b57cec5SDimitry Andric                        [(set VR128:$dst, (X86vfpround VR256:$src))]>,
1676*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
1677*0b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
1678*0b57cec5SDimitry Andric                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
1679*0b57cec5SDimitry Andric                        [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>,
1680*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
1681*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
1682*0b57cec5SDimitry Andric
1683*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
1684*0b57cec5SDimitry Andric                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
1685*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
1686*0b57cec5SDimitry Andric                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
1687*0b57cec5SDimitry Andric
1688*0b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1689*0b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1690*0b57cec5SDimitry Andric                     [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
1691*0b57cec5SDimitry Andric                     Sched<[WriteCvtPD2PS]>;
1692*0b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1693*0b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1694*0b57cec5SDimitry Andric                     [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
1695*0b57cec5SDimitry Andric                     Sched<[WriteCvtPD2PS.Folded]>;
1696*0b57cec5SDimitry Andric
1697*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1698*0b57cec5SDimitry Andric  def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
1699*0b57cec5SDimitry Andric            (VCVTPD2PSYrr VR256:$src)>;
1700*0b57cec5SDimitry Andric  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
1701*0b57cec5SDimitry Andric            (VCVTPD2PSYrm addr:$src)>;
1702*0b57cec5SDimitry Andric}
1703*0b57cec5SDimitry Andric
1704*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1705*0b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions
1706*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1707*0b57cec5SDimitry Andric
1708*0b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
1709*0b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
1710*0b57cec5SDimitry Andric                            SDNode OpNode, ValueType VT,
1711*0b57cec5SDimitry Andric                            PatFrag ld_frag, string asm,
1712*0b57cec5SDimitry Andric                            X86FoldableSchedWrite sched> {
1713*0b57cec5SDimitry Andric  let isCommutable = 1 in
1714*0b57cec5SDimitry Andric  def rr : SIi8<0xC2, MRMSrcReg,
1715*0b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
1716*0b57cec5SDimitry Andric                [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
1717*0b57cec5SDimitry Andric                Sched<[sched]>;
1718*0b57cec5SDimitry Andric  def rm : SIi8<0xC2, MRMSrcMem,
1719*0b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
1720*0b57cec5SDimitry Andric                [(set RC:$dst, (OpNode (VT RC:$src1),
1721*0b57cec5SDimitry Andric                                         (ld_frag addr:$src2), imm:$cc))]>,
1722*0b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold]>;
1723*0b57cec5SDimitry Andric}
1724*0b57cec5SDimitry Andric
1725*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
1726*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
1727*0b57cec5SDimitry Andric  defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
1728*0b57cec5SDimitry Andric                   "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1729*0b57cec5SDimitry Andric                   SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
1730*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
1731*0b57cec5SDimitry Andric  defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
1732*0b57cec5SDimitry Andric                   "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1733*0b57cec5SDimitry Andric                   SchedWriteFCmpSizes.PD.Scl>,
1734*0b57cec5SDimitry Andric                   XD, VEX_4V, VEX_LIG, VEX_WIG;
1735*0b57cec5SDimitry Andric
1736*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
1737*0b57cec5SDimitry Andric    let ExeDomain = SSEPackedSingle in
1738*0b57cec5SDimitry Andric    defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
1739*0b57cec5SDimitry Andric                    "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
1740*0b57cec5SDimitry Andric                    SchedWriteFCmpSizes.PS.Scl>, XS;
1741*0b57cec5SDimitry Andric    let ExeDomain = SSEPackedDouble in
1742*0b57cec5SDimitry Andric    defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
1743*0b57cec5SDimitry Andric                    "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
1744*0b57cec5SDimitry Andric                    SchedWriteFCmpSizes.PD.Scl>, XD;
1745*0b57cec5SDimitry Andric  }
1746*0b57cec5SDimitry Andric}
1747*0b57cec5SDimitry Andric
1748*0b57cec5SDimitry Andricmulticlass sse12_cmp_scalar_int<Operand memop,
1749*0b57cec5SDimitry Andric                         Intrinsic Int, string asm, X86FoldableSchedWrite sched,
1750*0b57cec5SDimitry Andric                         ComplexPattern mem_cpat> {
1751*0b57cec5SDimitry Andric  def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
1752*0b57cec5SDimitry Andric                      (ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
1753*0b57cec5SDimitry Andric                        [(set VR128:$dst, (Int VR128:$src1,
1754*0b57cec5SDimitry Andric                                               VR128:$src, imm:$cc))]>,
1755*0b57cec5SDimitry Andric           Sched<[sched]>;
1756*0b57cec5SDimitry Andriclet mayLoad = 1 in
1757*0b57cec5SDimitry Andric  def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
1758*0b57cec5SDimitry Andric                      (ins VR128:$src1, memop:$src, u8imm:$cc), asm,
1759*0b57cec5SDimitry Andric                        [(set VR128:$dst, (Int VR128:$src1,
1760*0b57cec5SDimitry Andric                                               mem_cpat:$src, imm:$cc))]>,
1761*0b57cec5SDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>;
1762*0b57cec5SDimitry Andric}
1763*0b57cec5SDimitry Andric
1764*0b57cec5SDimitry Andric// Aliases to match intrinsics which expect XMM operand(s).
1765*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
1766*0b57cec5SDimitry Andricdefm VCMPSS  : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
1767*0b57cec5SDimitry Andric                     "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
1768*0b57cec5SDimitry Andric                     SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
1769*0b57cec5SDimitry Andric                     XS, VEX_4V, VEX_LIG, VEX_WIG;
1770*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
1771*0b57cec5SDimitry Andricdefm VCMPSD  : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
1772*0b57cec5SDimitry Andric                     "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
1773*0b57cec5SDimitry Andric                     SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
1774*0b57cec5SDimitry Andric                     XD, VEX_4V, VEX_LIG, VEX_WIG;
1775*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
1776*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
1777*0b57cec5SDimitry Andric  defm CMPSS  : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
1778*0b57cec5SDimitry Andric                       "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}",
1779*0b57cec5SDimitry Andric                       SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
1780*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
1781*0b57cec5SDimitry Andric  defm CMPSD  : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
1782*0b57cec5SDimitry Andric                       "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}",
1783*0b57cec5SDimitry Andric                       SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
1784*0b57cec5SDimitry Andric}
1785*0b57cec5SDimitry Andric
1786*0b57cec5SDimitry Andric
1787*0b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
1788*0b57cec5SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
1789*0b57cec5SDimitry Andric                         ValueType vt, X86MemOperand x86memop,
1790*0b57cec5SDimitry Andric                         PatFrag ld_frag, string OpcodeStr,
1791*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched> {
1792*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
1793*0b57cec5SDimitry Andric  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1794*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1795*0b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
1796*0b57cec5SDimitry Andric          Sched<[sched]>;
1797*0b57cec5SDimitry Andriclet mayLoad = 1 in
1798*0b57cec5SDimitry Andric  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
1799*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1800*0b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
1801*0b57cec5SDimitry Andric                                           (ld_frag addr:$src2)))]>,
1802*0b57cec5SDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>;
1803*0b57cec5SDimitry Andric}
1804*0b57cec5SDimitry Andric}
1805*0b57cec5SDimitry Andric
1806*0b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
1807*0b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
1808*0b57cec5SDimitry Andric                             ValueType vt, Operand memop,
1809*0b57cec5SDimitry Andric                             ComplexPattern mem_cpat, string OpcodeStr,
1810*0b57cec5SDimitry Andric                             X86FoldableSchedWrite sched> {
1811*0b57cec5SDimitry Andric  def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1812*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1813*0b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
1814*0b57cec5SDimitry Andric          Sched<[sched]>;
1815*0b57cec5SDimitry Andriclet mayLoad = 1 in
1816*0b57cec5SDimitry Andric  def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
1817*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1818*0b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
1819*0b57cec5SDimitry Andric                                           mem_cpat:$src2))]>,
1820*0b57cec5SDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>;
1821*0b57cec5SDimitry Andric}
1822*0b57cec5SDimitry Andric
1823*0b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
1824*0b57cec5SDimitry Andric  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
1825*0b57cec5SDimitry Andric                               "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
1826*0b57cec5SDimitry Andric  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
1827*0b57cec5SDimitry Andric                               "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
1828*0b57cec5SDimitry Andric  let Pattern = []<dag> in {
1829*0b57cec5SDimitry Andric    defm VCOMISS  : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
1830*0b57cec5SDimitry Andric                                "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
1831*0b57cec5SDimitry Andric    defm VCOMISD  : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
1832*0b57cec5SDimitry Andric                                "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
1833*0b57cec5SDimitry Andric  }
1834*0b57cec5SDimitry Andric
1835*0b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
1836*0b57cec5SDimitry Andric    defm VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
1837*0b57cec5SDimitry Andric                      sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
1838*0b57cec5SDimitry Andric    defm VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
1839*0b57cec5SDimitry Andric                      sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
1840*0b57cec5SDimitry Andric
1841*0b57cec5SDimitry Andric    defm VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
1842*0b57cec5SDimitry Andric                       sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
1843*0b57cec5SDimitry Andric    defm VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
1844*0b57cec5SDimitry Andric                       sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
1845*0b57cec5SDimitry Andric  }
1846*0b57cec5SDimitry Andric  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
1847*0b57cec5SDimitry Andric                                  "ucomiss", WriteFCom>, PS;
1848*0b57cec5SDimitry Andric  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
1849*0b57cec5SDimitry Andric                                  "ucomisd", WriteFCom>, PD;
1850*0b57cec5SDimitry Andric
1851*0b57cec5SDimitry Andric  let Pattern = []<dag> in {
1852*0b57cec5SDimitry Andric    defm COMISS  : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
1853*0b57cec5SDimitry Andric                                    "comiss", WriteFCom>, PS;
1854*0b57cec5SDimitry Andric    defm COMISD  : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
1855*0b57cec5SDimitry Andric                                    "comisd", WriteFCom>, PD;
1856*0b57cec5SDimitry Andric  }
1857*0b57cec5SDimitry Andric
1858*0b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
1859*0b57cec5SDimitry Andric    defm UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
1860*0b57cec5SDimitry Andric                            sse_load_f32, "ucomiss", WriteFCom>, PS;
1861*0b57cec5SDimitry Andric    defm UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
1862*0b57cec5SDimitry Andric                            sse_load_f64, "ucomisd", WriteFCom>, PD;
1863*0b57cec5SDimitry Andric
1864*0b57cec5SDimitry Andric    defm COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
1865*0b57cec5SDimitry Andric                                sse_load_f32, "comiss", WriteFCom>, PS;
1866*0b57cec5SDimitry Andric    defm COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
1867*0b57cec5SDimitry Andric                                    sse_load_f64, "comisd", WriteFCom>, PD;
1868*0b57cec5SDimitry Andric  }
1869*0b57cec5SDimitry Andric} // Defs = [EFLAGS]
1870*0b57cec5SDimitry Andric
1871*0b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions
1872*0b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
1873*0b57cec5SDimitry Andric                            ValueType VT, string asm,
1874*0b57cec5SDimitry Andric                            X86FoldableSchedWrite sched,
1875*0b57cec5SDimitry Andric                            Domain d, PatFrag ld_frag> {
1876*0b57cec5SDimitry Andric  let isCommutable = 1 in
1877*0b57cec5SDimitry Andric  def rri : PIi8<0xC2, MRMSrcReg,
1878*0b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
1879*0b57cec5SDimitry Andric             [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
1880*0b57cec5SDimitry Andric            Sched<[sched]>;
1881*0b57cec5SDimitry Andric  def rmi : PIi8<0xC2, MRMSrcMem,
1882*0b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
1883*0b57cec5SDimitry Andric             [(set RC:$dst,
1884*0b57cec5SDimitry Andric               (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
1885*0b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>;
1886*0b57cec5SDimitry Andric}
1887*0b57cec5SDimitry Andric
1888*0b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
1889*0b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1890*0b57cec5SDimitry Andric               SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
1891*0b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
1892*0b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1893*0b57cec5SDimitry Andric               SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
1894*0b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
1895*0b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1896*0b57cec5SDimitry Andric               SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
1897*0b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
1898*0b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
1899*0b57cec5SDimitry Andric               SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
1900*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
1901*0b57cec5SDimitry Andric  defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
1902*0b57cec5SDimitry Andric                 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
1903*0b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
1904*0b57cec5SDimitry Andric  defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
1905*0b57cec5SDimitry Andric                 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
1906*0b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
1907*0b57cec5SDimitry Andric}
1908*0b57cec5SDimitry Andric
1909*0b57cec5SDimitry Andricdef CommutableCMPCC : PatLeaf<(imm), [{
1910*0b57cec5SDimitry Andric  uint64_t Imm = N->getZExtValue() & 0x7;
1911*0b57cec5SDimitry Andric  return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
1912*0b57cec5SDimitry Andric}]>;
1913*0b57cec5SDimitry Andric
1914*0b57cec5SDimitry Andric// Patterns to select compares with loads in first operand.
1915*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
1916*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
1917*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1918*0b57cec5SDimitry Andric            (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
1919*0b57cec5SDimitry Andric
1920*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
1921*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1922*0b57cec5SDimitry Andric            (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>;
1923*0b57cec5SDimitry Andric
1924*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
1925*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1926*0b57cec5SDimitry Andric            (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
1927*0b57cec5SDimitry Andric
1928*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
1929*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1930*0b57cec5SDimitry Andric            (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
1931*0b57cec5SDimitry Andric
1932*0b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
1933*0b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
1934*0b57cec5SDimitry Andric            (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
1935*0b57cec5SDimitry Andric
1936*0b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
1937*0b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
1938*0b57cec5SDimitry Andric            (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
1939*0b57cec5SDimitry Andric}
1940*0b57cec5SDimitry Andric
1941*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1942*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1,
1943*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1944*0b57cec5SDimitry Andric            (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
1945*0b57cec5SDimitry Andric
1946*0b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
1947*0b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
1948*0b57cec5SDimitry Andric            (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
1949*0b57cec5SDimitry Andric}
1950*0b57cec5SDimitry Andric
1951*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
1952*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1,
1953*0b57cec5SDimitry Andric                            CommutableCMPCC:$cc)),
1954*0b57cec5SDimitry Andric            (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
1955*0b57cec5SDimitry Andric
1956*0b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
1957*0b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
1958*0b57cec5SDimitry Andric            (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
1959*0b57cec5SDimitry Andric}
1960*0b57cec5SDimitry Andric
1961*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1962*0b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions
1963*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1964*0b57cec5SDimitry Andric
1965*0b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
1966*0b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
1967*0b57cec5SDimitry Andric                         ValueType vt, string asm, PatFrag mem_frag,
1968*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, Domain d,
1969*0b57cec5SDimitry Andric                         bit IsCommutable = 0> {
1970*0b57cec5SDimitry Andric  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
1971*0b57cec5SDimitry Andric                   (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
1972*0b57cec5SDimitry Andric                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
1973*0b57cec5SDimitry Andric                                       (i8 imm:$src3))))], d>,
1974*0b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>;
1975*0b57cec5SDimitry Andric  let isCommutable = IsCommutable in
1976*0b57cec5SDimitry Andric  def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
1977*0b57cec5SDimitry Andric                 (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
1978*0b57cec5SDimitry Andric                 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
1979*0b57cec5SDimitry Andric                                     (i8 imm:$src3))))], d>,
1980*0b57cec5SDimitry Andric            Sched<[sched]>;
1981*0b57cec5SDimitry Andric}
1982*0b57cec5SDimitry Andric
1983*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1984*0b57cec5SDimitry Andric  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
1985*0b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
1986*0b57cec5SDimitry Andric           loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
1987*0b57cec5SDimitry Andric           PS, VEX_4V, VEX_WIG;
1988*0b57cec5SDimitry Andric  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
1989*0b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
1990*0b57cec5SDimitry Andric           loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
1991*0b57cec5SDimitry Andric           PS, VEX_4V, VEX_L, VEX_WIG;
1992*0b57cec5SDimitry Andric  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
1993*0b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
1994*0b57cec5SDimitry Andric           loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
1995*0b57cec5SDimitry Andric           PD, VEX_4V, VEX_WIG;
1996*0b57cec5SDimitry Andric  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
1997*0b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
1998*0b57cec5SDimitry Andric           loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
1999*0b57cec5SDimitry Andric           PD, VEX_4V, VEX_L, VEX_WIG;
2000*0b57cec5SDimitry Andric}
2001*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
2002*0b57cec5SDimitry Andric  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
2003*0b57cec5SDimitry Andric                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2004*0b57cec5SDimitry Andric                    memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
2005*0b57cec5SDimitry Andric  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
2006*0b57cec5SDimitry Andric                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2007*0b57cec5SDimitry Andric                    memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
2008*0b57cec5SDimitry Andric}
2009*0b57cec5SDimitry Andric
2010*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2011*0b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions
2012*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2013*0b57cec5SDimitry Andric
2014*0b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
2015*0b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
2016*0b57cec5SDimitry Andric                                   PatFrag mem_frag, RegisterClass RC,
2017*0b57cec5SDimitry Andric                                   X86MemOperand x86memop, string asm,
2018*0b57cec5SDimitry Andric                                   X86FoldableSchedWrite sched, Domain d,
2019*0b57cec5SDimitry Andric                                   bit IsCommutable = 0> {
2020*0b57cec5SDimitry Andric    let isCommutable = IsCommutable in
2021*0b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg,
2022*0b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, RC:$src2),
2023*0b57cec5SDimitry Andric                asm, [(set RC:$dst,
2024*0b57cec5SDimitry Andric                           (vt (OpNode RC:$src1, RC:$src2)))], d>,
2025*0b57cec5SDimitry Andric                Sched<[sched]>;
2026*0b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem,
2027*0b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2028*0b57cec5SDimitry Andric                asm, [(set RC:$dst,
2029*0b57cec5SDimitry Andric                           (vt (OpNode RC:$src1,
2030*0b57cec5SDimitry Andric                                       (mem_frag addr:$src2))))], d>,
2031*0b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
2032*0b57cec5SDimitry Andric}
2033*0b57cec5SDimitry Andric
2034*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
2035*0b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
2036*0b57cec5SDimitry Andric      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2037*0b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
2038*0b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
2039*0b57cec5SDimitry Andric      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2040*0b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
2041*0b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
2042*0b57cec5SDimitry Andric      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2043*0b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
2044*0b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
2045*0b57cec5SDimitry Andric      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2046*0b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
2047*0b57cec5SDimitry Andric
2048*0b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
2049*0b57cec5SDimitry Andric      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2050*0b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
2051*0b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
2052*0b57cec5SDimitry Andric      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2053*0b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
2054*0b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
2055*0b57cec5SDimitry Andric      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2056*0b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
2057*0b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
2058*0b57cec5SDimitry Andric      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2059*0b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
2060*0b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX]
2061*0b57cec5SDimitry Andric
2062*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
2063*0b57cec5SDimitry Andric  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
2064*0b57cec5SDimitry Andric        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
2065*0b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
2066*0b57cec5SDimitry Andric  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
2067*0b57cec5SDimitry Andric        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
2068*0b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
2069*0b57cec5SDimitry Andric  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
2070*0b57cec5SDimitry Andric        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
2071*0b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
2072*0b57cec5SDimitry Andric  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
2073*0b57cec5SDimitry Andric        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
2074*0b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
2075*0b57cec5SDimitry Andric} // Constraints = "$src1 = $dst"
2076*0b57cec5SDimitry Andric
2077*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
2078*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
2079*0b57cec5SDimitry Andric            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
2080*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
2081*0b57cec5SDimitry Andric            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
2082*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
2083*0b57cec5SDimitry Andric            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
2084*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
2085*0b57cec5SDimitry Andric            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
2086*0b57cec5SDimitry Andric
2087*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
2088*0b57cec5SDimitry Andric            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
2089*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
2090*0b57cec5SDimitry Andric            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
2091*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
2092*0b57cec5SDimitry Andric            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
2093*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
2094*0b57cec5SDimitry Andric            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
2095*0b57cec5SDimitry Andric}
2096*0b57cec5SDimitry Andric
2097*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
2098*0b57cec5SDimitry Andric  // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
2099*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
2100*0b57cec5SDimitry Andric                              (v2f64 (nonvolatile_load addr:$src2)))),
2101*0b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
2102*0b57cec5SDimitry Andric}
2103*0b57cec5SDimitry Andric
2104*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2105*0b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask
2106*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2107*0b57cec5SDimitry Andric
2108*0b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
2109*0b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
2110*0b57cec5SDimitry Andric                                string asm, Domain d> {
2111*0b57cec5SDimitry Andric  def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
2112*0b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2113*0b57cec5SDimitry Andric              [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>,
2114*0b57cec5SDimitry Andric              Sched<[WriteFMOVMSK]>;
2115*0b57cec5SDimitry Andric}
2116*0b57cec5SDimitry Andric
2117*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
2118*0b57cec5SDimitry Andric  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
2119*0b57cec5SDimitry Andric                                        SSEPackedSingle>, PS, VEX, VEX_WIG;
2120*0b57cec5SDimitry Andric  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
2121*0b57cec5SDimitry Andric                                        SSEPackedDouble>, PD, VEX, VEX_WIG;
2122*0b57cec5SDimitry Andric  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
2123*0b57cec5SDimitry Andric                                         SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
2124*0b57cec5SDimitry Andric  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
2125*0b57cec5SDimitry Andric                                         SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
2126*0b57cec5SDimitry Andric
2127*0b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
2128*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
2129*0b57cec5SDimitry Andric            (VMOVMSKPSrr VR128:$src)>;
2130*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
2131*0b57cec5SDimitry Andric            (VMOVMSKPDrr VR128:$src)>;
2132*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v8i32 VR256:$src)),
2133*0b57cec5SDimitry Andric            (VMOVMSKPSYrr VR256:$src)>;
2134*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i64 VR256:$src)),
2135*0b57cec5SDimitry Andric            (VMOVMSKPDYrr VR256:$src)>;
2136*0b57cec5SDimitry Andric}
2137*0b57cec5SDimitry Andric
2138*0b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
2139*0b57cec5SDimitry Andric                                     SSEPackedSingle>, PS;
2140*0b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
2141*0b57cec5SDimitry Andric                                     SSEPackedDouble>, PD;
2142*0b57cec5SDimitry Andric
2143*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
2144*0b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
2145*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
2146*0b57cec5SDimitry Andric            (MOVMSKPSrr VR128:$src)>;
2147*0b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
2148*0b57cec5SDimitry Andric            (MOVMSKPDrr VR128:$src)>;
2149*0b57cec5SDimitry Andric}
2150*0b57cec5SDimitry Andric
2151*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
2152*0b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
2153*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
2154*0b57cec5SDimitry Andric
2155*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
2156*0b57cec5SDimitry Andric
2157*0b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator.
2158*0b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2159*0b57cec5SDimitry Andric                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2160*0b57cec5SDimitry Andric                        X86MemOperand x86memop, X86FoldableSchedWrite sched,
2161*0b57cec5SDimitry Andric                        bit IsCommutable, bit Is2Addr> {
2162*0b57cec5SDimitry Andric  let isCommutable = IsCommutable in
2163*0b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
2164*0b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
2165*0b57cec5SDimitry Andric       !if(Is2Addr,
2166*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2167*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2168*0b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
2169*0b57cec5SDimitry Andric       Sched<[sched]>;
2170*0b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
2171*0b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
2172*0b57cec5SDimitry Andric       !if(Is2Addr,
2173*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2174*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2175*0b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
2176*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
2177*0b57cec5SDimitry Andric}
2178*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
2179*0b57cec5SDimitry Andric
2180*0b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
2181*0b57cec5SDimitry Andric                         ValueType OpVT128, ValueType OpVT256,
2182*0b57cec5SDimitry Andric                         X86SchedWriteWidths sched, bit IsCommutable,
2183*0b57cec5SDimitry Andric                         Predicate prd> {
2184*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
2185*0b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
2186*0b57cec5SDimitry Andric                             VR128, load, i128mem, sched.XMM,
2187*0b57cec5SDimitry Andric                             IsCommutable, 0>, VEX_4V, VEX_WIG;
2188*0b57cec5SDimitry Andric
2189*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
2190*0b57cec5SDimitry Andric  defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
2191*0b57cec5SDimitry Andric                           memop, i128mem, sched.XMM, IsCommutable, 1>;
2192*0b57cec5SDimitry Andric
2193*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
2194*0b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
2195*0b57cec5SDimitry Andric                               OpVT256, VR256, load, i256mem, sched.YMM,
2196*0b57cec5SDimitry Andric                               IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
2197*0b57cec5SDimitry Andric}
2198*0b57cec5SDimitry Andric
2199*0b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions
2200*0b57cec5SDimitry Andric
2201*0b57cec5SDimitry Andricdefm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
2202*0b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
2203*0b57cec5SDimitry Andricdefm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
2204*0b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
2205*0b57cec5SDimitry Andricdefm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
2206*0b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
2207*0b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
2208*0b57cec5SDimitry Andric                           SchedWriteVecLogic, 0, NoVLX>;
2209*0b57cec5SDimitry Andric
2210*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2211*0b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions
2212*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2213*0b57cec5SDimitry Andric
2214*0b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
2215*0b57cec5SDimitry Andric///
2216*0b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2
2217*0b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later.
2218*0b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
2219*0b57cec5SDimitry Andric                                   SDNode OpNode, X86SchedWriteWidths sched> {
2220*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
2221*0b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
2222*0b57cec5SDimitry Andric        !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
2223*0b57cec5SDimitry Andric        [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
2224*0b57cec5SDimitry Andric
2225*0b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
2226*0b57cec5SDimitry Andric        !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
2227*0b57cec5SDimitry Andric        [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
2228*0b57cec5SDimitry Andric
2229*0b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2230*0b57cec5SDimitry Andric       !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
2231*0b57cec5SDimitry Andric       [], [], 0>, PS, VEX_4V, VEX_WIG;
2232*0b57cec5SDimitry Andric
2233*0b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2234*0b57cec5SDimitry Andric       !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
2235*0b57cec5SDimitry Andric       [], [], 0>, PD, VEX_4V, VEX_WIG;
2236*0b57cec5SDimitry Andric  }
2237*0b57cec5SDimitry Andric
2238*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2239*0b57cec5SDimitry Andric    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2240*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
2241*0b57cec5SDimitry Andric         [], []>, PS;
2242*0b57cec5SDimitry Andric
2243*0b57cec5SDimitry Andric    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2244*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
2245*0b57cec5SDimitry Andric         [], []>, PD;
2246*0b57cec5SDimitry Andric  }
2247*0b57cec5SDimitry Andric}
2248*0b57cec5SDimitry Andric
2249*0b57cec5SDimitry Andricdefm AND  : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>;
2250*0b57cec5SDimitry Andricdefm OR   : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>;
2251*0b57cec5SDimitry Andricdefm XOR  : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
2252*0b57cec5SDimitry Andriclet isCommutable = 0 in
2253*0b57cec5SDimitry Andric  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
2254*0b57cec5SDimitry Andric
2255*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
2256*0b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2257*0b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
2258*0b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2259*0b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
2260*0b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2261*0b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
2262*0b57cec5SDimitry Andric
2263*0b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2264*0b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
2265*0b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2266*0b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
2267*0b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2268*0b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
2269*0b57cec5SDimitry Andric
2270*0b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2271*0b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
2272*0b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2273*0b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
2274*0b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2275*0b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
2276*0b57cec5SDimitry Andric
2277*0b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2278*0b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
2279*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2280*0b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
2281*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2282*0b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
2283*0b57cec5SDimitry Andric
2284*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
2285*0b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
2286*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
2287*0b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
2288*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
2289*0b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
2290*0b57cec5SDimitry Andric
2291*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
2292*0b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
2293*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
2294*0b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
2295*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
2296*0b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
2297*0b57cec5SDimitry Andric
2298*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
2299*0b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
2300*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
2301*0b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
2302*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
2303*0b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
2304*0b57cec5SDimitry Andric
2305*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
2306*0b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
2307*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
2308*0b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
2309*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
2310*0b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
2311*0b57cec5SDimitry Andric}
2312*0b57cec5SDimitry Andric
2313*0b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with
2314*0b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available.
2315*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
2316*0b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
2317*0b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
2318*0b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
2319*0b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
2320*0b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
2321*0b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
2322*0b57cec5SDimitry Andric  def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
2323*0b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
2324*0b57cec5SDimitry Andric
2325*0b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
2326*0b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
2327*0b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
2328*0b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
2329*0b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
2330*0b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
2331*0b57cec5SDimitry Andric  def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
2332*0b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
2333*0b57cec5SDimitry Andric
2334*0b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
2335*0b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
2336*0b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
2337*0b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
2338*0b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
2339*0b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
2340*0b57cec5SDimitry Andric  def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
2341*0b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
2342*0b57cec5SDimitry Andric
2343*0b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
2344*0b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2345*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
2346*0b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2347*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
2348*0b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2349*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
2350*0b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
2351*0b57cec5SDimitry Andric
2352*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
2353*0b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
2354*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
2355*0b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
2356*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
2357*0b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
2358*0b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
2359*0b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
2360*0b57cec5SDimitry Andric
2361*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
2362*0b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
2363*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
2364*0b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
2365*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
2366*0b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
2367*0b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
2368*0b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
2369*0b57cec5SDimitry Andric
2370*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
2371*0b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
2372*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
2373*0b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
2374*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
2375*0b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
2376*0b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
2377*0b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
2378*0b57cec5SDimitry Andric
2379*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
2380*0b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
2381*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
2382*0b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
2383*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
2384*0b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
2385*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
2386*0b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
2387*0b57cec5SDimitry Andric}
2388*0b57cec5SDimitry Andric
2389*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
2390*0b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2391*0b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
2392*0b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2393*0b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
2394*0b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2395*0b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
2396*0b57cec5SDimitry Andric
2397*0b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2398*0b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
2399*0b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2400*0b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
2401*0b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2402*0b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
2403*0b57cec5SDimitry Andric
2404*0b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2405*0b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
2406*0b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2407*0b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
2408*0b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2409*0b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
2410*0b57cec5SDimitry Andric
2411*0b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2412*0b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
2413*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2414*0b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
2415*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2416*0b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
2417*0b57cec5SDimitry Andric
2418*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
2419*0b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
2420*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
2421*0b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
2422*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
2423*0b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
2424*0b57cec5SDimitry Andric
2425*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
2426*0b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
2427*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
2428*0b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
2429*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
2430*0b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
2431*0b57cec5SDimitry Andric
2432*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
2433*0b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
2434*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
2435*0b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
2436*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
2437*0b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
2438*0b57cec5SDimitry Andric
2439*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
2440*0b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
2441*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
2442*0b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
2443*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
2444*0b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
2445*0b57cec5SDimitry Andric}
2446*0b57cec5SDimitry Andric
2447*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
2448*0b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
2449*0b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
2450*0b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
2451*0b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
2452*0b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
2453*0b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
2454*0b57cec5SDimitry Andric
2455*0b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
2456*0b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
2457*0b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
2458*0b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
2459*0b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
2460*0b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
2461*0b57cec5SDimitry Andric
2462*0b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
2463*0b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
2464*0b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
2465*0b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
2466*0b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
2467*0b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
2468*0b57cec5SDimitry Andric
2469*0b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
2470*0b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
2471*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
2472*0b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
2473*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
2474*0b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
2475*0b57cec5SDimitry Andric
2476*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
2477*0b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
2478*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
2479*0b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
2480*0b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
2481*0b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
2482*0b57cec5SDimitry Andric
2483*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
2484*0b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
2485*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
2486*0b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
2487*0b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
2488*0b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
2489*0b57cec5SDimitry Andric
2490*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
2491*0b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
2492*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
2493*0b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
2494*0b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
2495*0b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
2496*0b57cec5SDimitry Andric
2497*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
2498*0b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
2499*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
2500*0b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
2501*0b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
2502*0b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
2503*0b57cec5SDimitry Andric}
2504*0b57cec5SDimitry Andric
2505*0b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available.
2506*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
2507*0b57cec5SDimitry Andric          (ANDPSrr VR128:$src1, VR128:$src2)>;
2508*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
2509*0b57cec5SDimitry Andric          (ORPSrr VR128:$src1, VR128:$src2)>;
2510*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
2511*0b57cec5SDimitry Andric          (XORPSrr VR128:$src1, VR128:$src2)>;
2512*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
2513*0b57cec5SDimitry Andric          (ANDNPSrr VR128:$src1, VR128:$src2)>;
2514*0b57cec5SDimitry Andric
2515*0b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
2516*0b57cec5SDimitry Andric          (ANDPSrm VR128:$src1, addr:$src2)>;
2517*0b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
2518*0b57cec5SDimitry Andric          (ORPSrm VR128:$src1, addr:$src2)>;
2519*0b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
2520*0b57cec5SDimitry Andric          (XORPSrm VR128:$src1, addr:$src2)>;
2521*0b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
2522*0b57cec5SDimitry Andric          (ANDNPSrm VR128:$src1, addr:$src2)>;
2523*0b57cec5SDimitry Andric
2524*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2525*0b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions
2526*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2527*0b57cec5SDimitry Andric
2528*0b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
2529*0b57cec5SDimitry Andric/// vector forms.
2530*0b57cec5SDimitry Andric///
2531*0b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
2532*0b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
2533*0b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar)
2534*0b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted).
2535*0b57cec5SDimitry Andric///
2536*0b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem.
2537*0b57cec5SDimitry Andric///
2538*0b57cec5SDimitry Andric
2539*0b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
2540*0b57cec5SDimitry Andric/// classes below
2541*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
2542*0b57cec5SDimitry Andric                                  SDNode OpNode, X86SchedWriteSizes sched> {
2543*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
2544*0b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
2545*0b57cec5SDimitry Andric                               VR128, v4f32, f128mem, loadv4f32,
2546*0b57cec5SDimitry Andric                               SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
2547*0b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
2548*0b57cec5SDimitry Andric                               VR128, v2f64, f128mem, loadv2f64,
2549*0b57cec5SDimitry Andric                               SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
2550*0b57cec5SDimitry Andric
2551*0b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
2552*0b57cec5SDimitry Andric                        OpNode, VR256, v8f32, f256mem, loadv8f32,
2553*0b57cec5SDimitry Andric                        SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
2554*0b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
2555*0b57cec5SDimitry Andric                        OpNode, VR256, v4f64, f256mem, loadv4f64,
2556*0b57cec5SDimitry Andric                        SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
2557*0b57cec5SDimitry Andric  }
2558*0b57cec5SDimitry Andric
2559*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2560*0b57cec5SDimitry Andric    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
2561*0b57cec5SDimitry Andric                              v4f32, f128mem, memopv4f32, SSEPackedSingle,
2562*0b57cec5SDimitry Andric                              sched.PS.XMM>, PS;
2563*0b57cec5SDimitry Andric    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
2564*0b57cec5SDimitry Andric                              v2f64, f128mem, memopv2f64, SSEPackedDouble,
2565*0b57cec5SDimitry Andric                              sched.PD.XMM>, PD;
2566*0b57cec5SDimitry Andric  }
2567*0b57cec5SDimitry Andric}
2568*0b57cec5SDimitry Andric
2569*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2570*0b57cec5SDimitry Andric                                  X86SchedWriteSizes sched> {
2571*0b57cec5SDimitry Andric  defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
2572*0b57cec5SDimitry Andric                         OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
2573*0b57cec5SDimitry Andric                         XS, VEX_4V, VEX_LIG, VEX_WIG;
2574*0b57cec5SDimitry Andric  defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
2575*0b57cec5SDimitry Andric                         OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
2576*0b57cec5SDimitry Andric                         XD, VEX_4V, VEX_LIG, VEX_WIG;
2577*0b57cec5SDimitry Andric
2578*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2579*0b57cec5SDimitry Andric    defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
2580*0b57cec5SDimitry Andric                              OpNode, FR32, f32mem, SSEPackedSingle,
2581*0b57cec5SDimitry Andric                              sched.PS.Scl>, XS;
2582*0b57cec5SDimitry Andric    defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
2583*0b57cec5SDimitry Andric                              OpNode, FR64, f64mem, SSEPackedDouble,
2584*0b57cec5SDimitry Andric                              sched.PD.Scl>, XD;
2585*0b57cec5SDimitry Andric  }
2586*0b57cec5SDimitry Andric}
2587*0b57cec5SDimitry Andric
2588*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
2589*0b57cec5SDimitry Andric                                      SDPatternOperator OpNode,
2590*0b57cec5SDimitry Andric                                      X86SchedWriteSizes sched> {
2591*0b57cec5SDimitry Andric  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
2592*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
2593*0b57cec5SDimitry Andric                   SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
2594*0b57cec5SDimitry Andric  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
2595*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
2596*0b57cec5SDimitry Andric                   SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
2597*0b57cec5SDimitry Andric
2598*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2599*0b57cec5SDimitry Andric    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
2600*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
2601*0b57cec5SDimitry Andric                   SSEPackedSingle, sched.PS.Scl>, XS;
2602*0b57cec5SDimitry Andric    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
2603*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
2604*0b57cec5SDimitry Andric                   SSEPackedDouble, sched.PD.Scl>, XD;
2605*0b57cec5SDimitry Andric  }
2606*0b57cec5SDimitry Andric}
2607*0b57cec5SDimitry Andric
2608*0b57cec5SDimitry Andric// Binary Arithmetic instructions
2609*0b57cec5SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>,
2610*0b57cec5SDimitry Andric           basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>,
2611*0b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
2612*0b57cec5SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>,
2613*0b57cec5SDimitry Andric           basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>,
2614*0b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
2615*0b57cec5SDimitry Andriclet isCommutable = 0 in {
2616*0b57cec5SDimitry Andric  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>,
2617*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>,
2618*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
2619*0b57cec5SDimitry Andric  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>,
2620*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>,
2621*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
2622*0b57cec5SDimitry Andric  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
2623*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
2624*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
2625*0b57cec5SDimitry Andric  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
2626*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
2627*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
2628*0b57cec5SDimitry Andric}
2629*0b57cec5SDimitry Andric
2630*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
2631*0b57cec5SDimitry Andric  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
2632*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
2633*0b57cec5SDimitry Andric  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
2634*0b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
2635*0b57cec5SDimitry Andric}
2636*0b57cec5SDimitry Andric
2637*0b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from
2638*0b57cec5SDimitry Andric// either:
2639*0b57cec5SDimitry Andric//
2640*0b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend
2641*0b57cec5SDimitry Andric//
2642*0b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector
2643*0b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions
2644*0b57cec5SDimitry Andric// like addss or mulss.
2645*0b57cec5SDimitry Andric//
2646*0b57cec5SDimitry Andric// For example, given the following code:
2647*0b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
2648*0b57cec5SDimitry Andric//     A[0] += B[0];
2649*0b57cec5SDimitry Andric//     return A;
2650*0b57cec5SDimitry Andric//   }
2651*0b57cec5SDimitry Andric//
2652*0b57cec5SDimitry Andric// Previously we generated:
2653*0b57cec5SDimitry Andric//   addss %xmm0, %xmm1
2654*0b57cec5SDimitry Andric//   movss %xmm1, %xmm0
2655*0b57cec5SDimitry Andric//
2656*0b57cec5SDimitry Andric// We now generate:
2657*0b57cec5SDimitry Andric//   addss %xmm1, %xmm0
2658*0b57cec5SDimitry Andric//
2659*0b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert
2660*0b57cec5SDimitry Andric//
2661*0b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction
2662*0b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction.
2663*0b57cec5SDimitry Andric//
2664*0b57cec5SDimitry Andric// For example, given the following code:
2665*0b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
2666*0b57cec5SDimitry Andric//     __m128 C = A + B;
2667*0b57cec5SDimitry Andric//     return (__m128) {c[0], a[1], a[2], a[3]};
2668*0b57cec5SDimitry Andric//   }
2669*0b57cec5SDimitry Andric//
2670*0b57cec5SDimitry Andric// Previously we generated:
2671*0b57cec5SDimitry Andric//   addps %xmm0, %xmm1
2672*0b57cec5SDimitry Andric//   movss %xmm1, %xmm0
2673*0b57cec5SDimitry Andric//
2674*0b57cec5SDimitry Andric// We now generate:
2675*0b57cec5SDimitry Andric//   addss %xmm1, %xmm0
2676*0b57cec5SDimitry Andric
2677*0b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of
2678*0b57cec5SDimitry Andric// patterns we have to try to match.
2679*0b57cec5SDimitry Andricmulticlass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
2680*0b57cec5SDimitry Andric                                    ValueType VT, ValueType EltTy,
2681*0b57cec5SDimitry Andric                                    RegisterClass RC, PatFrag ld_frag,
2682*0b57cec5SDimitry Andric                                    Predicate BasePredicate> {
2683*0b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
2684*0b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
2685*0b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
2686*0b57cec5SDimitry Andric                        (VT (scalar_to_vector
2687*0b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
2688*0b57cec5SDimitry Andric                                 RC:$src))))),
2689*0b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
2690*0b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
2691*0b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
2692*0b57cec5SDimitry Andric                        (VT (scalar_to_vector
2693*0b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
2694*0b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
2695*0b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
2696*0b57cec5SDimitry Andric  }
2697*0b57cec5SDimitry Andric
2698*0b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
2699*0b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
2700*0b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
2701*0b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
2702*0b57cec5SDimitry Andric                        (VT (scalar_to_vector
2703*0b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
2704*0b57cec5SDimitry Andric                                 RC:$src))))),
2705*0b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
2706*0b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
2707*0b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
2708*0b57cec5SDimitry Andric                        (VT (scalar_to_vector
2709*0b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
2710*0b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
2711*0b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
2712*0b57cec5SDimitry Andric  }
2713*0b57cec5SDimitry Andric}
2714*0b57cec5SDimitry Andric
2715*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2716*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2717*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2718*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2719*0b57cec5SDimitry Andric
2720*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2721*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2722*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2723*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2724*0b57cec5SDimitry Andric
2725*0b57cec5SDimitry Andric/// Unop Arithmetic
2726*0b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
2727*0b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
2728*0b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a
2729*0b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined.
2730*0b57cec5SDimitry Andric///
2731*0b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form.
2732*0b57cec5SDimitry Andric
2733*0b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form
2734*0b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because
2735*0b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive.
2736*0b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
2737*0b57cec5SDimitry Andric                          ValueType ScalarVT, X86MemOperand x86memop,
2738*0b57cec5SDimitry Andric                          Operand intmemop, SDNode OpNode, Domain d,
2739*0b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
2740*0b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
2741*0b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
2742*0b57cec5SDimitry Andric              !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
2743*0b57cec5SDimitry Andric            [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
2744*0b57cec5SDimitry Andric            Requires<[target]>;
2745*0b57cec5SDimitry Andric  let mayLoad = 1 in
2746*0b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
2747*0b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
2748*0b57cec5SDimitry Andric            [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
2749*0b57cec5SDimitry Andric            Sched<[sched.Folded]>,
2750*0b57cec5SDimitry Andric            Requires<[target, OptForSize]>;
2751*0b57cec5SDimitry Andric  }
2752*0b57cec5SDimitry Andric
2753*0b57cec5SDimitry Andric  let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
2754*0b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
2755*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
2756*0b57cec5SDimitry Andric                Sched<[sched]>;
2757*0b57cec5SDimitry Andric  let mayLoad = 1 in
2758*0b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
2759*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
2760*0b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold]>;
2761*0b57cec5SDimitry Andric  }
2762*0b57cec5SDimitry Andric
2763*0b57cec5SDimitry Andric}
2764*0b57cec5SDimitry Andric
2765*0b57cec5SDimitry Andricmulticlass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
2766*0b57cec5SDimitry Andric                              ComplexPattern int_cpat, Intrinsic Intr,
2767*0b57cec5SDimitry Andric                              Predicate target, string Suffix> {
2768*0b57cec5SDimitry Andric  let Predicates = [target] in {
2769*0b57cec5SDimitry Andric  // These are unary operations, but they are modeled as having 2 source operands
2770*0b57cec5SDimitry Andric  // because the high elements of the destination are unchanged in SSE.
2771*0b57cec5SDimitry Andric  def : Pat<(Intr VR128:$src),
2772*0b57cec5SDimitry Andric            (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>;
2773*0b57cec5SDimitry Andric  }
2774*0b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
2775*0b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
2776*0b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
2777*0b57cec5SDimitry Andric  // movss mem, %xmm0
2778*0b57cec5SDimitry Andric  // rcpss %xmm0, %xmm0
2779*0b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
2780*0b57cec5SDimitry Andric  // rcpss mem, %xmm0
2781*0b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
2782*0b57cec5SDimitry Andric    def : Pat<(Intr int_cpat:$src2),
2783*0b57cec5SDimitry Andric               (!cast<Instruction>(NAME#m_Int)
2784*0b57cec5SDimitry Andric                      (vt (IMPLICIT_DEF)), addr:$src2)>;
2785*0b57cec5SDimitry Andric  }
2786*0b57cec5SDimitry Andric}
2787*0b57cec5SDimitry Andric
2788*0b57cec5SDimitry Andricmulticlass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int_cpat,
2789*0b57cec5SDimitry Andric                              Intrinsic Intr, Predicate target> {
2790*0b57cec5SDimitry Andric  let Predicates = [target] in {
2791*0b57cec5SDimitry Andric   def : Pat<(Intr VR128:$src),
2792*0b57cec5SDimitry Andric             (!cast<Instruction>(NAME#r_Int) VR128:$src,
2793*0b57cec5SDimitry Andric                                 VR128:$src)>;
2794*0b57cec5SDimitry Andric  }
2795*0b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
2796*0b57cec5SDimitry Andric    def : Pat<(Intr int_cpat:$src2),
2797*0b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m_Int)
2798*0b57cec5SDimitry Andric                    (vt (IMPLICIT_DEF)), addr:$src2)>;
2799*0b57cec5SDimitry Andric  }
2800*0b57cec5SDimitry Andric}
2801*0b57cec5SDimitry Andric
2802*0b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
2803*0b57cec5SDimitry Andric                          ValueType ScalarVT, X86MemOperand x86memop,
2804*0b57cec5SDimitry Andric                          Operand intmemop, SDNode OpNode, Domain d,
2805*0b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
2806*0b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
2807*0b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2808*0b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2809*0b57cec5SDimitry Andric            [], d>, Sched<[sched]>;
2810*0b57cec5SDimitry Andric  let mayLoad = 1 in
2811*0b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2812*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2813*0b57cec5SDimitry Andric            [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
2814*0b57cec5SDimitry Andric  }
2815*0b57cec5SDimitry Andric  let hasSideEffects = 0, ExeDomain = d in {
2816*0b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
2817*0b57cec5SDimitry Andric                (ins VR128:$src1, VR128:$src2),
2818*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2819*0b57cec5SDimitry Andric             []>, Sched<[sched]>;
2820*0b57cec5SDimitry Andric  let mayLoad = 1 in
2821*0b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
2822*0b57cec5SDimitry Andric                (ins VR128:$src1, intmemop:$src2),
2823*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2824*0b57cec5SDimitry Andric             []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
2825*0b57cec5SDimitry Andric  }
2826*0b57cec5SDimitry Andric
2827*0b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
2828*0b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
2829*0b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
2830*0b57cec5SDimitry Andric  // vmovss mem, %xmm0
2831*0b57cec5SDimitry Andric  // vrcpss %xmm0, %xmm0, %xmm0
2832*0b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
2833*0b57cec5SDimitry Andric  // vrcpss mem, %xmm0, %xmm0
2834*0b57cec5SDimitry Andric  // TODO: In theory, we could fold the load, and avoid the stall caused by
2835*0b57cec5SDimitry Andric  // the partial register store, either in BreakFalseDeps or with smarter RA.
2836*0b57cec5SDimitry Andric  let Predicates = [target] in {
2837*0b57cec5SDimitry Andric   def : Pat<(OpNode RC:$src),  (!cast<Instruction>(NAME#r)
2838*0b57cec5SDimitry Andric                                (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
2839*0b57cec5SDimitry Andric  }
2840*0b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
2841*0b57cec5SDimitry Andric    def : Pat<(ScalarVT (OpNode (load addr:$src))),
2842*0b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)),
2843*0b57cec5SDimitry Andric            addr:$src)>;
2844*0b57cec5SDimitry Andric  }
2845*0b57cec5SDimitry Andric}
2846*0b57cec5SDimitry Andric
2847*0b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form.
2848*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
2849*0b57cec5SDimitry Andric                          X86SchedWriteWidths sched, list<Predicate> prds> {
2850*0b57cec5SDimitry Andriclet Predicates = prds in {
2851*0b57cec5SDimitry Andric  def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2852*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
2853*0b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
2854*0b57cec5SDimitry Andric                       [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
2855*0b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM]>, VEX_WIG;
2856*0b57cec5SDimitry Andric  def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2857*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
2858*0b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
2859*0b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
2860*0b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
2861*0b57cec5SDimitry Andric  def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2862*0b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
2863*0b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
2864*0b57cec5SDimitry Andric                        [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
2865*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
2866*0b57cec5SDimitry Andric  def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2867*0b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
2868*0b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
2869*0b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
2870*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
2871*0b57cec5SDimitry Andric}
2872*0b57cec5SDimitry Andric
2873*0b57cec5SDimitry Andric  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2874*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2875*0b57cec5SDimitry Andric                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
2876*0b57cec5SDimitry Andric                Sched<[sched.XMM]>;
2877*0b57cec5SDimitry Andric  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2878*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
2879*0b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>,
2880*0b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
2881*0b57cec5SDimitry Andric}
2882*0b57cec5SDimitry Andric
2883*0b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms.
2884*0b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
2885*0b57cec5SDimitry Andric                          SDNode OpNode, X86SchedWriteWidths sched> {
2886*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
2887*0b57cec5SDimitry Andric  def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2888*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
2889*0b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
2890*0b57cec5SDimitry Andric                       [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
2891*0b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM]>, VEX_WIG;
2892*0b57cec5SDimitry Andric  def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2893*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
2894*0b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
2895*0b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
2896*0b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
2897*0b57cec5SDimitry Andric  def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
2898*0b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
2899*0b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
2900*0b57cec5SDimitry Andric                        [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
2901*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
2902*0b57cec5SDimitry Andric  def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
2903*0b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
2904*0b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
2905*0b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
2906*0b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
2907*0b57cec5SDimitry Andric}
2908*0b57cec5SDimitry Andric
2909*0b57cec5SDimitry Andric  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2910*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2911*0b57cec5SDimitry Andric                [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
2912*0b57cec5SDimitry Andric                Sched<[sched.XMM]>;
2913*0b57cec5SDimitry Andric  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2914*0b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
2915*0b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>,
2916*0b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
2917*0b57cec5SDimitry Andric}
2918*0b57cec5SDimitry Andric
2919*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
2920*0b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
2921*0b57cec5SDimitry Andric  defm SS        :  sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
2922*0b57cec5SDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
2923*0b57cec5SDimitry Andric                      UseSSE1, "SS">, XS;
2924*0b57cec5SDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
2925*0b57cec5SDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
2926*0b57cec5SDimitry Andric                      AVXTarget>,
2927*0b57cec5SDimitry Andric                      XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
2928*0b57cec5SDimitry Andric}
2929*0b57cec5SDimitry Andric
2930*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2931*0b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
2932*0b57cec5SDimitry Andric  defm SS        :  sse_fp_unop_s<opc, OpcodeStr##ss, FR32, f32, f32mem,
2933*0b57cec5SDimitry Andric                      ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
2934*0b57cec5SDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, f32,
2935*0b57cec5SDimitry Andric                      f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
2936*0b57cec5SDimitry Andric                       XS, VEX_4V, VEX_LIG, VEX_WIG;
2937*0b57cec5SDimitry Andric}
2938*0b57cec5SDimitry Andric
2939*0b57cec5SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2940*0b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
2941*0b57cec5SDimitry Andric  defm SD         : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, f64, f64mem,
2942*0b57cec5SDimitry Andric                         sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
2943*0b57cec5SDimitry Andric  defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, f64,
2944*0b57cec5SDimitry Andric                         f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
2945*0b57cec5SDimitry Andric                         XD, VEX_4V, VEX_LIG, VEX_WIG;
2946*0b57cec5SDimitry Andric}
2947*0b57cec5SDimitry Andric
2948*0b57cec5SDimitry Andric// Square root.
2949*0b57cec5SDimitry Andricdefm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
2950*0b57cec5SDimitry Andric             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
2951*0b57cec5SDimitry Andric             sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>,
2952*0b57cec5SDimitry Andric             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>;
2953*0b57cec5SDimitry Andric
2954*0b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement
2955*0b57cec5SDimitry Andric// in order to obtain suitable precision.
2956*0b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
2957*0b57cec5SDimitry Andric             sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
2958*0b57cec5SDimitry Andric             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
2959*0b57cec5SDimitry Andricdefm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
2960*0b57cec5SDimitry Andric             sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
2961*0b57cec5SDimitry Andric             sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
2962*0b57cec5SDimitry Andric
2963*0b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions.
2964*0b57cec5SDimitry Andric
2965*0b57cec5SDimitry Andricmulticlass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Move,
2966*0b57cec5SDimitry Andric                                      ValueType VT, Predicate BasePredicate> {
2967*0b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
2968*0b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
2969*0b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
2970*0b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
2971*0b57cec5SDimitry Andric  }
2972*0b57cec5SDimitry Andric
2973*0b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
2974*0b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
2975*0b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
2976*0b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
2977*0b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
2978*0b57cec5SDimitry Andric  }
2979*0b57cec5SDimitry Andric}
2980*0b57cec5SDimitry Andric
2981*0b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
2982*0b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
2983*0b57cec5SDimitry Andric
2984*0b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
2985*0b57cec5SDimitry Andric                                           SDNode Move, ValueType VT,
2986*0b57cec5SDimitry Andric                                           Predicate BasePredicate> {
2987*0b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
2988*0b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
2989*0b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
2990*0b57cec5SDimitry Andric  }
2991*0b57cec5SDimitry Andric
2992*0b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
2993*0b57cec5SDimitry Andric  let Predicates = [HasAVX] in {
2994*0b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
2995*0b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
2996*0b57cec5SDimitry Andric  }
2997*0b57cec5SDimitry Andric}
2998*0b57cec5SDimitry Andric
2999*0b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
3000*0b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
3001*0b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
3002*0b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
3003*0b57cec5SDimitry Andric
3004*0b57cec5SDimitry Andric
3005*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3006*0b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores
3007*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3008*0b57cec5SDimitry Andric
3009*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
3010*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3011*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
3012*0b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
3013*0b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
3014*0b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
3015*0b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f32 VR128:$src),
3016*0b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_WIG;
3017*0b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
3018*0b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
3019*0b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
3020*0b57cec5SDimitry Andric                     [(alignednontemporalstore (v2f64 VR128:$src),
3021*0b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_WIG;
3022*0b57cec5SDimitry Andric} // SchedRW
3023*0b57cec5SDimitry Andric
3024*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
3025*0b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
3026*0b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
3027*0b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
3028*0b57cec5SDimitry Andric                     [(alignednontemporalstore (v8f32 VR256:$src),
3029*0b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, VEX_WIG;
3030*0b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
3031*0b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
3032*0b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
3033*0b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f64 VR256:$src),
3034*0b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, VEX_WIG;
3035*0b57cec5SDimitry Andric} // SchedRW
3036*0b57cec5SDimitry Andric
3037*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3038*0b57cec5SDimitry Andricdef VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
3039*0b57cec5SDimitry Andric                         (ins i128mem:$dst, VR128:$src),
3040*0b57cec5SDimitry Andric                         "movntdq\t{$src, $dst|$dst, $src}",
3041*0b57cec5SDimitry Andric                         [(alignednontemporalstore (v2i64 VR128:$src),
3042*0b57cec5SDimitry Andric                                                   addr:$dst)]>, VEX, VEX_WIG,
3043*0b57cec5SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
3044*0b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
3045*0b57cec5SDimitry Andric                    (ins i256mem:$dst, VR256:$src),
3046*0b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
3047*0b57cec5SDimitry Andric                    [(alignednontemporalstore (v4i64 VR256:$src),
3048*0b57cec5SDimitry Andric                                              addr:$dst)]>, VEX, VEX_L, VEX_WIG,
3049*0b57cec5SDimitry Andric                    Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
3050*0b57cec5SDimitry Andric} // ExeDomain
3051*0b57cec5SDimitry Andric} // Predicates
3052*0b57cec5SDimitry Andric
3053*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
3054*0b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3055*0b57cec5SDimitry Andric                    "movntps\t{$src, $dst|$dst, $src}",
3056*0b57cec5SDimitry Andric                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
3057*0b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3058*0b57cec5SDimitry Andric                    "movntpd\t{$src, $dst|$dst, $src}",
3059*0b57cec5SDimitry Andric                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
3060*0b57cec5SDimitry Andric} // SchedRW
3061*0b57cec5SDimitry Andric
3062*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
3063*0b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3064*0b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
3065*0b57cec5SDimitry Andric                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
3066*0b57cec5SDimitry Andric
3067*0b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in {
3068*0b57cec5SDimitry Andric// There is no AVX form for instructions below this point
3069*0b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
3070*0b57cec5SDimitry Andric                 "movnti{l}\t{$src, $dst|$dst, $src}",
3071*0b57cec5SDimitry Andric                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
3072*0b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
3073*0b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
3074*0b57cec5SDimitry Andric                     "movnti{q}\t{$src, $dst|$dst, $src}",
3075*0b57cec5SDimitry Andric                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
3076*0b57cec5SDimitry Andric                  PS, Requires<[HasSSE2]>;
3077*0b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT]
3078*0b57cec5SDimitry Andric
3079*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3080*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
3081*0b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3082*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
3083*0b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3084*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
3085*0b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3086*0b57cec5SDimitry Andric
3087*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
3088*0b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
3089*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
3090*0b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
3091*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
3092*0b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
3093*0b57cec5SDimitry Andric}
3094*0b57cec5SDimitry Andric
3095*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
3096*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
3097*0b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
3098*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
3099*0b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
3100*0b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
3101*0b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
3102*0b57cec5SDimitry Andric}
3103*0b57cec5SDimitry Andric
3104*0b57cec5SDimitry Andric} // AddedComplexity
3105*0b57cec5SDimitry Andric
3106*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3107*0b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence
3108*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3109*0b57cec5SDimitry Andric
3110*0b57cec5SDimitry Andric// Prefetch intrinsic.
3111*0b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
3112*0b57cec5SDimitry Andricdef PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
3113*0b57cec5SDimitry Andric    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
3114*0b57cec5SDimitry Andricdef PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
3115*0b57cec5SDimitry Andric    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
3116*0b57cec5SDimitry Andricdef PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
3117*0b57cec5SDimitry Andric    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
3118*0b57cec5SDimitry Andricdef PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
3119*0b57cec5SDimitry Andric    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
3120*0b57cec5SDimitry Andric}
3121*0b57cec5SDimitry Andric
3122*0b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled?
3123*0b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in {
3124*0b57cec5SDimitry Andric// Flush cache
3125*0b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
3126*0b57cec5SDimitry Andric               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
3127*0b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
3128*0b57cec5SDimitry Andric}
3129*0b57cec5SDimitry Andric
3130*0b57cec5SDimitry Andriclet SchedRW = [WriteNop] in {
3131*0b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it
3132*0b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible.
3133*0b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins),
3134*0b57cec5SDimitry Andric              "pause", [(int_x86_sse2_pause)]>, OBXS;
3135*0b57cec5SDimitry Andric}
3136*0b57cec5SDimitry Andric
3137*0b57cec5SDimitry Andriclet SchedRW = [WriteFence] in {
3138*0b57cec5SDimitry Andric// Load, store, and memory fence
3139*0b57cec5SDimitry Andric// TODO: As with mfence, we may want to ease the availablity of sfence/lfence
3140*0b57cec5SDimitry Andric// to include any 64-bit target.
3141*0b57cec5SDimitry Andricdef SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
3142*0b57cec5SDimitry Andric               PS, Requires<[HasSSE1]>;
3143*0b57cec5SDimitry Andricdef LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
3144*0b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
3145*0b57cec5SDimitry Andricdef MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
3146*0b57cec5SDimitry Andric               PS, Requires<[HasMFence]>;
3147*0b57cec5SDimitry Andric} // SchedRW
3148*0b57cec5SDimitry Andric
3149*0b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>;
3150*0b57cec5SDimitry Andric
3151*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3152*0b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register
3153*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3154*0b57cec5SDimitry Andric
3155*0b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in
3156*0b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
3157*0b57cec5SDimitry Andric               "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
3158*0b57cec5SDimitry Andric               VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
3159*0b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in
3160*0b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3161*0b57cec5SDimitry Andric               "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
3162*0b57cec5SDimitry Andric               VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
3163*0b57cec5SDimitry Andric
3164*0b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in
3165*0b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
3166*0b57cec5SDimitry Andric              "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
3167*0b57cec5SDimitry Andric              TB, Sched<[WriteLDMXCSR]>;
3168*0b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in
3169*0b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3170*0b57cec5SDimitry Andric              "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
3171*0b57cec5SDimitry Andric              TB, Sched<[WriteSTMXCSR]>;
3172*0b57cec5SDimitry Andric
3173*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3174*0b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
3175*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3176*0b57cec5SDimitry Andric
3177*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
3178*0b57cec5SDimitry Andric
3179*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
3180*0b57cec5SDimitry Andricdef VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3181*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
3182*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
3183*0b57cec5SDimitry Andricdef VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3184*0b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
3185*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
3186*0b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3187*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
3188*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
3189*0b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3190*0b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
3191*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
3192*0b57cec5SDimitry Andric}
3193*0b57cec5SDimitry Andric
3194*0b57cec5SDimitry Andric// For Disassembler
3195*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
3196*0b57cec5SDimitry Andricdef VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3197*0b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
3198*0b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
3199*0b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
3200*0b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3201*0b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
3202*0b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
3203*0b57cec5SDimitry Andric                          VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
3204*0b57cec5SDimitry Andricdef VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3205*0b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
3206*0b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
3207*0b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
3208*0b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3209*0b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
3210*0b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
3211*0b57cec5SDimitry Andric                          VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
3212*0b57cec5SDimitry Andric}
3213*0b57cec5SDimitry Andric
3214*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
3215*0b57cec5SDimitry Andric    hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
3216*0b57cec5SDimitry Andricdef VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3217*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
3218*0b57cec5SDimitry Andric                      [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
3219*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
3220*0b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3221*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
3222*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RM]>,
3223*0b57cec5SDimitry Andric                      VEX, VEX_L, VEX_WIG;
3224*0b57cec5SDimitry Andricdef VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3225*0b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
3226*0b57cec5SDimitry Andric                   [(set VR128:$dst, (loadv2i64 addr:$src))]>,
3227*0b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>,
3228*0b57cec5SDimitry Andric                   XS, VEX, VEX_WIG;
3229*0b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3230*0b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}", []>,
3231*0b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.RM]>,
3232*0b57cec5SDimitry Andric                   XS, VEX, VEX_L, VEX_WIG;
3233*0b57cec5SDimitry Andric}
3234*0b57cec5SDimitry Andric
3235*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
3236*0b57cec5SDimitry Andricdef VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
3237*0b57cec5SDimitry Andric                      (ins i128mem:$dst, VR128:$src),
3238*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
3239*0b57cec5SDimitry Andric                      [(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
3240*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG;
3241*0b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
3242*0b57cec5SDimitry Andric                      (ins i256mem:$dst, VR256:$src),
3243*0b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
3244*0b57cec5SDimitry Andric                     Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG;
3245*0b57cec5SDimitry Andricdef VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3246*0b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
3247*0b57cec5SDimitry Andric                   [(store (v2i64 VR128:$src), addr:$dst)]>,
3248*0b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG;
3249*0b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
3250*0b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",[]>,
3251*0b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG;
3252*0b57cec5SDimitry Andric}
3253*0b57cec5SDimitry Andric
3254*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
3255*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
3256*0b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3257*0b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}", []>;
3258*0b57cec5SDimitry Andric
3259*0b57cec5SDimitry Andricdef MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3260*0b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}", []>,
3261*0b57cec5SDimitry Andric                   XS, Requires<[UseSSE2]>;
3262*0b57cec5SDimitry Andric}
3263*0b57cec5SDimitry Andric
3264*0b57cec5SDimitry Andric// For Disassembler
3265*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
3266*0b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3267*0b57cec5SDimitry Andric                       "movdqa\t{$src, $dst|$dst, $src}", []>,
3268*0b57cec5SDimitry Andric                       FoldGenData<"MOVDQArr">;
3269*0b57cec5SDimitry Andric
3270*0b57cec5SDimitry Andricdef MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3271*0b57cec5SDimitry Andric                       "movdqu\t{$src, $dst|$dst, $src}", []>,
3272*0b57cec5SDimitry Andric                       XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">;
3273*0b57cec5SDimitry Andric}
3274*0b57cec5SDimitry Andric} // SchedRW
3275*0b57cec5SDimitry Andric
3276*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
3277*0b57cec5SDimitry Andric    hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in {
3278*0b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3279*0b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
3280*0b57cec5SDimitry Andric                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
3281*0b57cec5SDimitry Andricdef MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3282*0b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
3283*0b57cec5SDimitry Andric                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
3284*0b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
3285*0b57cec5SDimitry Andric}
3286*0b57cec5SDimitry Andric
3287*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0,
3288*0b57cec5SDimitry Andric    SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
3289*0b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3290*0b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
3291*0b57cec5SDimitry Andric                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
3292*0b57cec5SDimitry Andricdef MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3293*0b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
3294*0b57cec5SDimitry Andric                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
3295*0b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
3296*0b57cec5SDimitry Andric}
3297*0b57cec5SDimitry Andric
3298*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3299*0b57cec5SDimitry Andric
3300*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
3301*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
3302*0b57cec5SDimitry Andric                (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
3303*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
3304*0b57cec5SDimitry Andric                (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>;
3305*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
3306*0b57cec5SDimitry Andric                (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
3307*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
3308*0b57cec5SDimitry Andric                (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>;
3309*0b57cec5SDimitry Andric
3310*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
3311*0b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}",
3312*0b57cec5SDimitry Andric                (MOVDQArr_REV VR128:$dst, VR128:$src), 0>;
3313*0b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
3314*0b57cec5SDimitry Andric                (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
3315*0b57cec5SDimitry Andric
3316*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3317*0b57cec5SDimitry Andric  // Additional patterns for other integer sizes.
3318*0b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
3319*0b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
3320*0b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
3321*0b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
3322*0b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
3323*0b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
3324*0b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
3325*0b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
3326*0b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
3327*0b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
3328*0b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
3329*0b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
3330*0b57cec5SDimitry Andric
3331*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
3332*0b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
3333*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
3334*0b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
3335*0b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
3336*0b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
3337*0b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
3338*0b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
3339*0b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
3340*0b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
3341*0b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
3342*0b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
3343*0b57cec5SDimitry Andric}
3344*0b57cec5SDimitry Andric
3345*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3346*0b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions
3347*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3348*0b57cec5SDimitry Andric
3349*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
3350*0b57cec5SDimitry Andric
3351*0b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
3352*0b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
3353*0b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
3354*0b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
3355*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
3356*0b57cec5SDimitry Andric  let isCommutable = 1 in
3357*0b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3358*0b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
3359*0b57cec5SDimitry Andric       !if(Is2Addr,
3360*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3361*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3362*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
3363*0b57cec5SDimitry Andric       Sched<[sched]>;
3364*0b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3365*0b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
3366*0b57cec5SDimitry Andric       !if(Is2Addr,
3367*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3368*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3369*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
3370*0b57cec5SDimitry Andric                                     (memop_frag addr:$src2))))]>,
3371*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
3372*0b57cec5SDimitry Andric}
3373*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3374*0b57cec5SDimitry Andric
3375*0b57cec5SDimitry Andricdefm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
3376*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3377*0b57cec5SDimitry Andricdefm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
3378*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3379*0b57cec5SDimitry Andricdefm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
3380*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
3381*0b57cec5SDimitry Andricdefm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
3382*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
3383*0b57cec5SDimitry Andricdefm PADDSB  : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
3384*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3385*0b57cec5SDimitry Andricdefm PADDSW  : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
3386*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3387*0b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
3388*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3389*0b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
3390*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3391*0b57cec5SDimitry Andricdefm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
3392*0b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
3393*0b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
3394*0b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
3395*0b57cec5SDimitry Andricdefm PMULHW  : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
3396*0b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
3397*0b57cec5SDimitry Andricdefm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
3398*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3399*0b57cec5SDimitry Andricdefm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
3400*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3401*0b57cec5SDimitry Andricdefm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
3402*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
3403*0b57cec5SDimitry Andricdefm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
3404*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
3405*0b57cec5SDimitry Andricdefm PSUBSB  : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
3406*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3407*0b57cec5SDimitry Andricdefm PSUBSW  : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
3408*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3409*0b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
3410*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3411*0b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
3412*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3413*0b57cec5SDimitry Andricdefm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
3414*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3415*0b57cec5SDimitry Andricdefm PMINSW  : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
3416*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3417*0b57cec5SDimitry Andricdefm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
3418*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3419*0b57cec5SDimitry Andricdefm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
3420*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3421*0b57cec5SDimitry Andricdefm PAVGB   : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
3422*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3423*0b57cec5SDimitry Andricdefm PAVGW   : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
3424*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3425*0b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
3426*0b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX>;
3427*0b57cec5SDimitry Andric
3428*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
3429*0b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
3430*0b57cec5SDimitry Andric                              load, i128mem, SchedWriteVecIMul.XMM, 0>,
3431*0b57cec5SDimitry Andric                              VEX_4V, VEX_WIG;
3432*0b57cec5SDimitry Andric
3433*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
3434*0b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
3435*0b57cec5SDimitry Andric                               VR256, load, i256mem, SchedWriteVecIMul.YMM,
3436*0b57cec5SDimitry Andric                               0>, VEX_4V, VEX_L, VEX_WIG;
3437*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
3438*0b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
3439*0b57cec5SDimitry Andric                             memop, i128mem, SchedWriteVecIMul.XMM>;
3440*0b57cec5SDimitry Andric
3441*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
3442*0b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
3443*0b57cec5SDimitry Andric                             load, i128mem, SchedWritePSADBW.XMM, 0>,
3444*0b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
3445*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
3446*0b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
3447*0b57cec5SDimitry Andric                             load, i256mem, SchedWritePSADBW.YMM, 0>,
3448*0b57cec5SDimitry Andric                             VEX_4V, VEX_L, VEX_WIG;
3449*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
3450*0b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
3451*0b57cec5SDimitry Andric                            memop, i128mem, SchedWritePSADBW.XMM>;
3452*0b57cec5SDimitry Andric
3453*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3454*0b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
3455*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3456*0b57cec5SDimitry Andric
3457*0b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
3458*0b57cec5SDimitry Andric                         string OpcodeStr, SDNode OpNode,
3459*0b57cec5SDimitry Andric                         SDNode OpNode2, RegisterClass RC,
3460*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched,
3461*0b57cec5SDimitry Andric                         X86FoldableSchedWrite schedImm,
3462*0b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT,
3463*0b57cec5SDimitry Andric                         PatFrag ld_frag, bit Is2Addr = 1> {
3464*0b57cec5SDimitry Andric  // src2 is always 128-bit
3465*0b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3466*0b57cec5SDimitry Andric       (ins RC:$src1, VR128:$src2),
3467*0b57cec5SDimitry Andric       !if(Is2Addr,
3468*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3469*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3470*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>,
3471*0b57cec5SDimitry Andric       Sched<[sched]>;
3472*0b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3473*0b57cec5SDimitry Andric       (ins RC:$src1, i128mem:$src2),
3474*0b57cec5SDimitry Andric       !if(Is2Addr,
3475*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3476*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3477*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1,
3478*0b57cec5SDimitry Andric                       (SrcVT (ld_frag addr:$src2)))))]>,
3479*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
3480*0b57cec5SDimitry Andric  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
3481*0b57cec5SDimitry Andric       (ins RC:$src1, u8imm:$src2),
3482*0b57cec5SDimitry Andric       !if(Is2Addr,
3483*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3484*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3485*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
3486*0b57cec5SDimitry Andric       Sched<[schedImm]>;
3487*0b57cec5SDimitry Andric}
3488*0b57cec5SDimitry Andric
3489*0b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
3490*0b57cec5SDimitry Andric                             string OpcodeStr, SDNode OpNode,
3491*0b57cec5SDimitry Andric                             SDNode OpNode2, ValueType DstVT128,
3492*0b57cec5SDimitry Andric                             ValueType DstVT256, ValueType SrcVT,
3493*0b57cec5SDimitry Andric                             X86SchedWriteWidths sched,
3494*0b57cec5SDimitry Andric                             X86SchedWriteWidths schedImm, Predicate prd> {
3495*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
3496*0b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
3497*0b57cec5SDimitry Andric                              OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
3498*0b57cec5SDimitry Andric                              DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
3499*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
3500*0b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
3501*0b57cec5SDimitry Andric                                OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
3502*0b57cec5SDimitry Andric                                DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
3503*0b57cec5SDimitry Andric                                VEX_WIG;
3504*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
3505*0b57cec5SDimitry Andric  defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
3506*0b57cec5SDimitry Andric                            VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
3507*0b57cec5SDimitry Andric                            memop>;
3508*0b57cec5SDimitry Andric}
3509*0b57cec5SDimitry Andric
3510*0b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
3511*0b57cec5SDimitry Andric                        SDNode OpNode, RegisterClass RC, ValueType VT,
3512*0b57cec5SDimitry Andric                        X86FoldableSchedWrite sched, bit Is2Addr = 1> {
3513*0b57cec5SDimitry Andric  def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
3514*0b57cec5SDimitry Andric       !if(Is2Addr,
3515*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3516*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3517*0b57cec5SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>,
3518*0b57cec5SDimitry Andric       Sched<[sched]>;
3519*0b57cec5SDimitry Andric}
3520*0b57cec5SDimitry Andric
3521*0b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
3522*0b57cec5SDimitry Andric                            SDNode OpNode, X86SchedWriteWidths sched> {
3523*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
3524*0b57cec5SDimitry Andric  defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
3525*0b57cec5SDimitry Andric                             VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG;
3526*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
3527*0b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
3528*0b57cec5SDimitry Andric                               VR256, v32i8, sched.YMM, 0>,
3529*0b57cec5SDimitry Andric                               VEX_4V, VEX_L, VEX_WIG;
3530*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
3531*0b57cec5SDimitry Andric  defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
3532*0b57cec5SDimitry Andric                           sched.XMM>;
3533*0b57cec5SDimitry Andric}
3534*0b57cec5SDimitry Andric
3535*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3536*0b57cec5SDimitry Andric  defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
3537*0b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
3538*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
3539*0b57cec5SDimitry Andric  defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
3540*0b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
3541*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
3542*0b57cec5SDimitry Andric  defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
3543*0b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
3544*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
3545*0b57cec5SDimitry Andric
3546*0b57cec5SDimitry Andric  defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
3547*0b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
3548*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
3549*0b57cec5SDimitry Andric  defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
3550*0b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
3551*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
3552*0b57cec5SDimitry Andric  defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
3553*0b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
3554*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
3555*0b57cec5SDimitry Andric
3556*0b57cec5SDimitry Andric  defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
3557*0b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
3558*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
3559*0b57cec5SDimitry Andric  defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
3560*0b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
3561*0b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
3562*0b57cec5SDimitry Andric
3563*0b57cec5SDimitry Andric  defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq,
3564*0b57cec5SDimitry Andric                                 SchedWriteShuffle>;
3565*0b57cec5SDimitry Andric  defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq,
3566*0b57cec5SDimitry Andric                                 SchedWriteShuffle>;
3567*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3568*0b57cec5SDimitry Andric
3569*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3570*0b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions
3571*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3572*0b57cec5SDimitry Andric
3573*0b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
3574*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
3575*0b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
3576*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
3577*0b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
3578*0b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
3579*0b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
3580*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
3581*0b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
3582*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
3583*0b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
3584*0b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
3585*0b57cec5SDimitry Andric
3586*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3587*0b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions
3588*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3589*0b57cec5SDimitry Andric
3590*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3591*0b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
3592*0b57cec5SDimitry Andric                         SDNode OpNode, X86SchedWriteWidths sched,
3593*0b57cec5SDimitry Andric                         Predicate prd> {
3594*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in {
3595*0b57cec5SDimitry Andric  def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
3596*0b57cec5SDimitry Andric                      (ins VR128:$src1, u8imm:$src2),
3597*0b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
3598*0b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3599*0b57cec5SDimitry Andric                      [(set VR128:$dst,
3600*0b57cec5SDimitry Andric                        (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
3601*0b57cec5SDimitry Andric                      VEX, Sched<[sched.XMM]>, VEX_WIG;
3602*0b57cec5SDimitry Andric  def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
3603*0b57cec5SDimitry Andric                      (ins i128mem:$src1, u8imm:$src2),
3604*0b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
3605*0b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3606*0b57cec5SDimitry Andric                     [(set VR128:$dst,
3607*0b57cec5SDimitry Andric                       (vt128 (OpNode (load addr:$src1),
3608*0b57cec5SDimitry Andric                        (i8 imm:$src2))))]>, VEX,
3609*0b57cec5SDimitry Andric                  Sched<[sched.XMM.Folded]>, VEX_WIG;
3610*0b57cec5SDimitry Andric}
3611*0b57cec5SDimitry Andric
3612*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in {
3613*0b57cec5SDimitry Andric  def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
3614*0b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
3615*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
3616*0b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3617*0b57cec5SDimitry Andric                       [(set VR256:$dst,
3618*0b57cec5SDimitry Andric                         (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>,
3619*0b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
3620*0b57cec5SDimitry Andric  def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
3621*0b57cec5SDimitry Andric                       (ins i256mem:$src1, u8imm:$src2),
3622*0b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
3623*0b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3624*0b57cec5SDimitry Andric                      [(set VR256:$dst,
3625*0b57cec5SDimitry Andric                        (vt256 (OpNode (load addr:$src1),
3626*0b57cec5SDimitry Andric                         (i8 imm:$src2))))]>, VEX, VEX_L,
3627*0b57cec5SDimitry Andric                   Sched<[sched.YMM.Folded]>, VEX_WIG;
3628*0b57cec5SDimitry Andric}
3629*0b57cec5SDimitry Andric
3630*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
3631*0b57cec5SDimitry Andric  def ri : Ii8<0x70, MRMSrcReg,
3632*0b57cec5SDimitry Andric               (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
3633*0b57cec5SDimitry Andric               !strconcat(OpcodeStr,
3634*0b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3635*0b57cec5SDimitry Andric               [(set VR128:$dst,
3636*0b57cec5SDimitry Andric                 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
3637*0b57cec5SDimitry Andric               Sched<[sched.XMM]>;
3638*0b57cec5SDimitry Andric  def mi : Ii8<0x70, MRMSrcMem,
3639*0b57cec5SDimitry Andric               (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
3640*0b57cec5SDimitry Andric               !strconcat(OpcodeStr,
3641*0b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3642*0b57cec5SDimitry Andric               [(set VR128:$dst,
3643*0b57cec5SDimitry Andric                 (vt128 (OpNode (memop addr:$src1),
3644*0b57cec5SDimitry Andric                        (i8 imm:$src2))))]>,
3645*0b57cec5SDimitry Andric               Sched<[sched.XMM.Folded]>;
3646*0b57cec5SDimitry Andric}
3647*0b57cec5SDimitry Andric}
3648*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3649*0b57cec5SDimitry Andric
3650*0b57cec5SDimitry Andricdefm PSHUFD  : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd,
3651*0b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX>, PD;
3652*0b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
3653*0b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XS;
3654*0b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
3655*0b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XD;
3656*0b57cec5SDimitry Andric
3657*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3658*0b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX)
3659*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3660*0b57cec5SDimitry Andric
3661*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3662*0b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
3663*0b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
3664*0b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
3665*0b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
3666*0b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
3667*0b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, RC:$src2),
3668*0b57cec5SDimitry Andric               !if(Is2Addr,
3669*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3670*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
3671*0b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3672*0b57cec5SDimitry Andric               [(set RC:$dst,
3673*0b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
3674*0b57cec5SDimitry Andric               Sched<[sched]>;
3675*0b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
3676*0b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
3677*0b57cec5SDimitry Andric               !if(Is2Addr,
3678*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3679*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
3680*0b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3681*0b57cec5SDimitry Andric               [(set RC:$dst,
3682*0b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1),
3683*0b57cec5SDimitry Andric                                    (ld_frag addr:$src2))))]>,
3684*0b57cec5SDimitry Andric               Sched<[sched.Folded, sched.ReadAfterFold]>;
3685*0b57cec5SDimitry Andric}
3686*0b57cec5SDimitry Andric
3687*0b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
3688*0b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
3689*0b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
3690*0b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
3691*0b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg,
3692*0b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
3693*0b57cec5SDimitry Andric                 !if(Is2Addr,
3694*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3695*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
3696*0b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3697*0b57cec5SDimitry Andric                 [(set RC:$dst,
3698*0b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
3699*0b57cec5SDimitry Andric                 Sched<[sched]>;
3700*0b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem,
3701*0b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
3702*0b57cec5SDimitry Andric                 !if(Is2Addr,
3703*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3704*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
3705*0b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3706*0b57cec5SDimitry Andric                 [(set RC:$dst,
3707*0b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1),
3708*0b57cec5SDimitry Andric                                      (ld_frag addr:$src2))))]>,
3709*0b57cec5SDimitry Andric                 Sched<[sched.Folded, sched.ReadAfterFold]>;
3710*0b57cec5SDimitry Andric}
3711*0b57cec5SDimitry Andric
3712*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
3713*0b57cec5SDimitry Andric  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
3714*0b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
3715*0b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
3716*0b57cec5SDimitry Andric  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
3717*0b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
3718*0b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
3719*0b57cec5SDimitry Andric
3720*0b57cec5SDimitry Andric  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
3721*0b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
3722*0b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
3723*0b57cec5SDimitry Andric  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
3724*0b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
3725*0b57cec5SDimitry Andric                             VEX_4V;
3726*0b57cec5SDimitry Andric}
3727*0b57cec5SDimitry Andric
3728*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
3729*0b57cec5SDimitry Andric  defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
3730*0b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
3731*0b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
3732*0b57cec5SDimitry Andric  defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
3733*0b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
3734*0b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
3735*0b57cec5SDimitry Andric
3736*0b57cec5SDimitry Andric  defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
3737*0b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
3738*0b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
3739*0b57cec5SDimitry Andric  defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
3740*0b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
3741*0b57cec5SDimitry Andric                              VEX_4V, VEX_L;
3742*0b57cec5SDimitry Andric}
3743*0b57cec5SDimitry Andric
3744*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
3745*0b57cec5SDimitry Andric  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
3746*0b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
3747*0b57cec5SDimitry Andric  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
3748*0b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
3749*0b57cec5SDimitry Andric
3750*0b57cec5SDimitry Andric  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
3751*0b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
3752*0b57cec5SDimitry Andric
3753*0b57cec5SDimitry Andric  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
3754*0b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
3755*0b57cec5SDimitry Andric}
3756*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3757*0b57cec5SDimitry Andric
3758*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3759*0b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions
3760*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3761*0b57cec5SDimitry Andric
3762*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3763*0b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
3764*0b57cec5SDimitry Andric                       SDNode OpNode, RegisterClass RC, X86MemOperand x86memop,
3765*0b57cec5SDimitry Andric                       X86FoldableSchedWrite sched, PatFrag ld_frag,
3766*0b57cec5SDimitry Andric                       bit Is2Addr = 1> {
3767*0b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
3768*0b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, RC:$src2),
3769*0b57cec5SDimitry Andric      !if(Is2Addr,
3770*0b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
3771*0b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3772*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
3773*0b57cec5SDimitry Andric      Sched<[sched]>;
3774*0b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
3775*0b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
3776*0b57cec5SDimitry Andric      !if(Is2Addr,
3777*0b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
3778*0b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3779*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
3780*0b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
3781*0b57cec5SDimitry Andric}
3782*0b57cec5SDimitry Andric
3783*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
3784*0b57cec5SDimitry Andric  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
3785*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3786*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3787*0b57cec5SDimitry Andric  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
3788*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3789*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3790*0b57cec5SDimitry Andric  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
3791*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3792*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3793*0b57cec5SDimitry Andric  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
3794*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3795*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3796*0b57cec5SDimitry Andric}
3797*0b57cec5SDimitry Andric
3798*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3799*0b57cec5SDimitry Andric  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
3800*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3801*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3802*0b57cec5SDimitry Andric  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
3803*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3804*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3805*0b57cec5SDimitry Andric  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
3806*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3807*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3808*0b57cec5SDimitry Andric  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
3809*0b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
3810*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
3811*0b57cec5SDimitry Andric}
3812*0b57cec5SDimitry Andric
3813*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
3814*0b57cec5SDimitry Andric  defm VPUNPCKLBWY  : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
3815*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3816*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3817*0b57cec5SDimitry Andric  defm VPUNPCKLWDY  : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
3818*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3819*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3820*0b57cec5SDimitry Andric  defm VPUNPCKHBWY  : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
3821*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3822*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3823*0b57cec5SDimitry Andric  defm VPUNPCKHWDY  : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
3824*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3825*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3826*0b57cec5SDimitry Andric}
3827*0b57cec5SDimitry Andric
3828*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
3829*0b57cec5SDimitry Andric  defm VPUNPCKLDQY  : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
3830*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3831*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3832*0b57cec5SDimitry Andric  defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
3833*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3834*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3835*0b57cec5SDimitry Andric  defm VPUNPCKHDQY  : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
3836*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3837*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3838*0b57cec5SDimitry Andric  defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
3839*0b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
3840*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
3841*0b57cec5SDimitry Andric}
3842*0b57cec5SDimitry Andric
3843*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
3844*0b57cec5SDimitry Andric  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
3845*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3846*0b57cec5SDimitry Andric  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
3847*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3848*0b57cec5SDimitry Andric  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
3849*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3850*0b57cec5SDimitry Andric  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
3851*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3852*0b57cec5SDimitry Andric
3853*0b57cec5SDimitry Andric  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
3854*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3855*0b57cec5SDimitry Andric  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
3856*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3857*0b57cec5SDimitry Andric  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
3858*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3859*0b57cec5SDimitry Andric  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
3860*0b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
3861*0b57cec5SDimitry Andric}
3862*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3863*0b57cec5SDimitry Andric
3864*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3865*0b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert
3866*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3867*0b57cec5SDimitry Andric
3868*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3869*0b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> {
3870*0b57cec5SDimitry Andric  def rr : Ii8<0xC4, MRMSrcReg,
3871*0b57cec5SDimitry Andric       (outs VR128:$dst), (ins VR128:$src1,
3872*0b57cec5SDimitry Andric        GR32orGR64:$src2, u8imm:$src3),
3873*0b57cec5SDimitry Andric       !if(Is2Addr,
3874*0b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
3875*0b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3876*0b57cec5SDimitry Andric       [(set VR128:$dst,
3877*0b57cec5SDimitry Andric         (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
3878*0b57cec5SDimitry Andric       Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
3879*0b57cec5SDimitry Andric  def rm : Ii8<0xC4, MRMSrcMem,
3880*0b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1,
3881*0b57cec5SDimitry Andric                       i16mem:$src2, u8imm:$src3),
3882*0b57cec5SDimitry Andric       !if(Is2Addr,
3883*0b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
3884*0b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
3885*0b57cec5SDimitry Andric       [(set VR128:$dst,
3886*0b57cec5SDimitry Andric         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
3887*0b57cec5SDimitry Andric                    imm:$src3))]>,
3888*0b57cec5SDimitry Andric       Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
3889*0b57cec5SDimitry Andric}
3890*0b57cec5SDimitry Andric
3891*0b57cec5SDimitry Andric// Extract
3892*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
3893*0b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
3894*0b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
3895*0b57cec5SDimitry Andric                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3896*0b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
3897*0b57cec5SDimitry Andric                                            imm:$src2))]>,
3898*0b57cec5SDimitry Andric                PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
3899*0b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
3900*0b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
3901*0b57cec5SDimitry Andric                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3902*0b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
3903*0b57cec5SDimitry Andric                                            imm:$src2))]>,
3904*0b57cec5SDimitry Andric               Sched<[WriteVecExtract]>;
3905*0b57cec5SDimitry Andric
3906*0b57cec5SDimitry Andric// Insert
3907*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
3908*0b57cec5SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
3909*0b57cec5SDimitry Andric
3910*0b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
3911*0b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD;
3912*0b57cec5SDimitry Andric
3913*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3914*0b57cec5SDimitry Andric
3915*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3916*0b57cec5SDimitry Andric// SSE2 - Packed Mask Creation
3917*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3918*0b57cec5SDimitry Andric
3919*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3920*0b57cec5SDimitry Andric
3921*0b57cec5SDimitry Andricdef VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
3922*0b57cec5SDimitry Andric           (ins VR128:$src),
3923*0b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
3924*0b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
3925*0b57cec5SDimitry Andric           Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG;
3926*0b57cec5SDimitry Andric
3927*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
3928*0b57cec5SDimitry Andricdef VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
3929*0b57cec5SDimitry Andric           (ins VR256:$src),
3930*0b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
3931*0b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
3932*0b57cec5SDimitry Andric           Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG;
3933*0b57cec5SDimitry Andric}
3934*0b57cec5SDimitry Andric
3935*0b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
3936*0b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
3937*0b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
3938*0b57cec5SDimitry Andric           Sched<[WriteVecMOVMSK]>;
3939*0b57cec5SDimitry Andric
3940*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3941*0b57cec5SDimitry Andric
3942*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3943*0b57cec5SDimitry Andric// SSE2 - Conditional Store
3944*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3945*0b57cec5SDimitry Andric
3946*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
3947*0b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
3948*0b57cec5SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
3949*0b57cec5SDimitry Andric           (ins VR128:$src, VR128:$mask),
3950*0b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
3951*0b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
3952*0b57cec5SDimitry Andric           VEX, VEX_WIG;
3953*0b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
3954*0b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
3955*0b57cec5SDimitry Andric           (ins VR128:$src, VR128:$mask),
3956*0b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
3957*0b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
3958*0b57cec5SDimitry Andric           VEX, VEX_WIG;
3959*0b57cec5SDimitry Andric
3960*0b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
3961*0b57cec5SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
3962*0b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
3963*0b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
3964*0b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
3965*0b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
3966*0b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
3967*0b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
3968*0b57cec5SDimitry Andric
3969*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
3970*0b57cec5SDimitry Andric
3971*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3972*0b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword
3973*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3974*0b57cec5SDimitry Andric
3975*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
3976*0b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int
3977*0b57cec5SDimitry Andric//
3978*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
3979*0b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
3980*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
3981*0b57cec5SDimitry Andric                        [(set VR128:$dst,
3982*0b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector GR32:$src)))]>,
3983*0b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
3984*0b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
3985*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
3986*0b57cec5SDimitry Andric                        [(set VR128:$dst,
3987*0b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3988*0b57cec5SDimitry Andric                        VEX, Sched<[WriteVecLoad]>;
3989*0b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
3990*0b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
3991*0b57cec5SDimitry Andric                          [(set VR128:$dst,
3992*0b57cec5SDimitry Andric                            (v2i64 (scalar_to_vector GR64:$src)))]>,
3993*0b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
3994*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3995*0b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
3996*0b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
3997*0b57cec5SDimitry Andric                          VEX, Sched<[WriteVecLoad]>;
3998*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
3999*0b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4000*0b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
4001*0b57cec5SDimitry Andric                         [(set FR64:$dst, (bitconvert GR64:$src))]>,
4002*0b57cec5SDimitry Andric                         VEX, Sched<[WriteVecMoveFromGpr]>;
4003*0b57cec5SDimitry Andric
4004*0b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4005*0b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
4006*0b57cec5SDimitry Andric                      [(set VR128:$dst,
4007*0b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector GR32:$src)))]>,
4008*0b57cec5SDimitry Andric                      Sched<[WriteVecMoveFromGpr]>;
4009*0b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4010*0b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
4011*0b57cec5SDimitry Andric                      [(set VR128:$dst,
4012*0b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
4013*0b57cec5SDimitry Andric                      Sched<[WriteVecLoad]>;
4014*0b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4015*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
4016*0b57cec5SDimitry Andric                        [(set VR128:$dst,
4017*0b57cec5SDimitry Andric                          (v2i64 (scalar_to_vector GR64:$src)))]>,
4018*0b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
4019*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
4020*0b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4021*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
4022*0b57cec5SDimitry Andric                        Sched<[WriteVecLoad]>;
4023*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
4024*0b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4025*0b57cec5SDimitry Andric                       "movq\t{$src, $dst|$dst, $src}",
4026*0b57cec5SDimitry Andric                       [(set FR64:$dst, (bitconvert GR64:$src))]>,
4027*0b57cec5SDimitry Andric                       Sched<[WriteVecMoveFromGpr]>;
4028*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
4029*0b57cec5SDimitry Andric
4030*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4031*0b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar
4032*0b57cec5SDimitry Andric//
4033*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4034*0b57cec5SDimitry Andric  def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4035*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
4036*0b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
4037*0b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveFromGpr]>;
4038*0b57cec5SDimitry Andric
4039*0b57cec5SDimitry Andric  def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4040*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
4041*0b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
4042*0b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
4043*0b57cec5SDimitry Andric
4044*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4045*0b57cec5SDimitry Andric
4046*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4047*0b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int
4048*0b57cec5SDimitry Andric//
4049*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
4050*0b57cec5SDimitry Andricdef VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
4051*0b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
4052*0b57cec5SDimitry Andric                         [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
4053*0b57cec5SDimitry Andric                                          (iPTR 0)))]>, VEX,
4054*0b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
4055*0b57cec5SDimitry Andricdef VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
4056*0b57cec5SDimitry Andric                         (ins i32mem:$dst, VR128:$src),
4057*0b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
4058*0b57cec5SDimitry Andric                         [(store (i32 (extractelt (v4i32 VR128:$src),
4059*0b57cec5SDimitry Andric                                       (iPTR 0))), addr:$dst)]>,
4060*0b57cec5SDimitry Andric                         VEX, Sched<[WriteVecStore]>;
4061*0b57cec5SDimitry Andricdef MOVPDI2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
4062*0b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
4063*0b57cec5SDimitry Andric                       [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
4064*0b57cec5SDimitry Andric                                        (iPTR 0)))]>,
4065*0b57cec5SDimitry Andric                   Sched<[WriteVecMoveToGpr]>;
4066*0b57cec5SDimitry Andricdef MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
4067*0b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
4068*0b57cec5SDimitry Andric                       [(store (i32 (extractelt (v4i32 VR128:$src),
4069*0b57cec5SDimitry Andric                                     (iPTR 0))), addr:$dst)]>,
4070*0b57cec5SDimitry Andric                       Sched<[WriteVecStore]>;
4071*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
4072*0b57cec5SDimitry Andric
4073*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4074*0b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int
4075*0b57cec5SDimitry Andric//
4076*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
4077*0b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in {
4078*0b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4079*0b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
4080*0b57cec5SDimitry Andric                          [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
4081*0b57cec5SDimitry Andric                                                        (iPTR 0)))]>,
4082*0b57cec5SDimitry Andric                      VEX;
4083*0b57cec5SDimitry Andric
4084*0b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4085*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
4086*0b57cec5SDimitry Andric                        [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
4087*0b57cec5SDimitry Andric                                                         (iPTR 0)))]>;
4088*0b57cec5SDimitry Andric} //SchedRW
4089*0b57cec5SDimitry Andric
4090*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4091*0b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
4092*0b57cec5SDimitry Andric                          (ins i64mem:$dst, VR128:$src),
4093*0b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
4094*0b57cec5SDimitry Andric                          VEX, Sched<[WriteVecStore]>;
4095*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4096*0b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4097*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
4098*0b57cec5SDimitry Andric                        Sched<[WriteVecStore]>;
4099*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
4100*0b57cec5SDimitry Andric
4101*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4102*0b57cec5SDimitry Andric// Bitcast FR64 <-> GR64
4103*0b57cec5SDimitry Andric//
4104*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4105*0b57cec5SDimitry Andric  def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4106*0b57cec5SDimitry Andric                           "movq\t{$src, $dst|$dst, $src}",
4107*0b57cec5SDimitry Andric                           [(set GR64:$dst, (bitconvert FR64:$src))]>,
4108*0b57cec5SDimitry Andric                           VEX, Sched<[WriteVecMoveToGpr]>;
4109*0b57cec5SDimitry Andric
4110*0b57cec5SDimitry Andric  def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4111*0b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
4112*0b57cec5SDimitry Andric                         [(set GR64:$dst, (bitconvert FR64:$src))]>,
4113*0b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
4114*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4115*0b57cec5SDimitry Andric
4116*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4117*0b57cec5SDimitry Andric// Move Scalar Single to Double Int
4118*0b57cec5SDimitry Andric//
4119*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4120*0b57cec5SDimitry Andric  def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4121*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
4122*0b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
4123*0b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveToGpr]>;
4124*0b57cec5SDimitry Andric  def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4125*0b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
4126*0b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
4127*0b57cec5SDimitry Andric                        Sched<[WriteVecMoveToGpr]>;
4128*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4129*0b57cec5SDimitry Andric
4130*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
4131*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4132*0b57cec5SDimitry Andric            (VMOVDI2PDIrr GR32:$src)>;
4133*0b57cec5SDimitry Andric
4134*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4135*0b57cec5SDimitry Andric            (VMOV64toPQIrr GR64:$src)>;
4136*0b57cec5SDimitry Andric
4137*0b57cec5SDimitry Andric  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
4138*0b57cec5SDimitry Andric  // These instructions also write zeros in the high part of a 256-bit register.
4139*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4140*0b57cec5SDimitry Andric            (VMOVDI2PDIrm addr:$src)>;
4141*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4142*0b57cec5SDimitry Andric            (VMOVDI2PDIrm addr:$src)>;
4143*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4144*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
4145*0b57cec5SDimitry Andric}
4146*0b57cec5SDimitry Andric
4147*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
4148*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4149*0b57cec5SDimitry Andric            (MOVDI2PDIrr GR32:$src)>;
4150*0b57cec5SDimitry Andric
4151*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4152*0b57cec5SDimitry Andric            (MOV64toPQIrr GR64:$src)>;
4153*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4154*0b57cec5SDimitry Andric            (MOVDI2PDIrm addr:$src)>;
4155*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4156*0b57cec5SDimitry Andric            (MOVDI2PDIrm addr:$src)>;
4157*0b57cec5SDimitry Andric}
4158*0b57cec5SDimitry Andric
4159*0b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
4160*0b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add
4161*0b57cec5SDimitry Andric// these aliases.
4162*0b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
4163*0b57cec5SDimitry Andric                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
4164*0b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
4165*0b57cec5SDimitry Andric                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
4166*0b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
4167*0b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4168*0b57cec5SDimitry Andric                (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
4169*0b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4170*0b57cec5SDimitry Andric                (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
4171*0b57cec5SDimitry Andric
4172*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4173*0b57cec5SDimitry Andric// SSE2 - Move Quadword
4174*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4175*0b57cec5SDimitry Andric
4176*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4177*0b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int
4178*0b57cec5SDimitry Andric//
4179*0b57cec5SDimitry Andric
4180*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in {
4181*0b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4182*0b57cec5SDimitry Andric                    "vmovq\t{$src, $dst|$dst, $src}",
4183*0b57cec5SDimitry Andric                    [(set VR128:$dst,
4184*0b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
4185*0b57cec5SDimitry Andric                    VEX, Requires<[UseAVX]>, VEX_WIG;
4186*0b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4187*0b57cec5SDimitry Andric                    "movq\t{$src, $dst|$dst, $src}",
4188*0b57cec5SDimitry Andric                    [(set VR128:$dst,
4189*0b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4190*0b57cec5SDimitry Andric                    XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
4191*0b57cec5SDimitry Andric} // ExeDomain, SchedRW
4192*0b57cec5SDimitry Andric
4193*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4194*0b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int
4195*0b57cec5SDimitry Andric//
4196*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
4197*0b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4198*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
4199*0b57cec5SDimitry Andric                        [(store (i64 (extractelt (v2i64 VR128:$src),
4200*0b57cec5SDimitry Andric                                      (iPTR 0))), addr:$dst)]>,
4201*0b57cec5SDimitry Andric                        VEX, VEX_WIG;
4202*0b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4203*0b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}",
4204*0b57cec5SDimitry Andric                      [(store (i64 (extractelt (v2i64 VR128:$src),
4205*0b57cec5SDimitry Andric                                    (iPTR 0))), addr:$dst)]>;
4206*0b57cec5SDimitry Andric} // ExeDomain, SchedRW
4207*0b57cec5SDimitry Andric
4208*0b57cec5SDimitry Andric// For disassembler only
4209*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
4210*0b57cec5SDimitry Andric    SchedRW = [SchedWriteVecLogic.XMM] in {
4211*0b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4212*0b57cec5SDimitry Andric                     "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
4213*0b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
4214*0b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}", []>;
4215*0b57cec5SDimitry Andric}
4216*0b57cec5SDimitry Andric
4217*0b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4218*0b57cec5SDimitry Andric                (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
4219*0b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
4220*0b57cec5SDimitry Andric                (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
4221*0b57cec5SDimitry Andric
4222*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
4223*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4224*0b57cec5SDimitry Andric            (VMOVQI2PQIrm addr:$src)>;
4225*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4226*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
4227*0b57cec5SDimitry Andric
4228*0b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
4229*0b57cec5SDimitry Andric            (VMOVPQI2QImr addr:$dst, VR128:$src)>;
4230*0b57cec5SDimitry Andric}
4231*0b57cec5SDimitry Andric
4232*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
4233*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
4234*0b57cec5SDimitry Andric
4235*0b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
4236*0b57cec5SDimitry Andric            (MOVPQI2QImr addr:$dst, VR128:$src)>;
4237*0b57cec5SDimitry Andric}
4238*0b57cec5SDimitry Andric
4239*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4240*0b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
4241*0b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits.
4242*0b57cec5SDimitry Andric//
4243*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4244*0b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4245*0b57cec5SDimitry Andric                        "vmovq\t{$src, $dst|$dst, $src}",
4246*0b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
4247*0b57cec5SDimitry Andric                         XS, VEX, Requires<[UseAVX]>, VEX_WIG;
4248*0b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4249*0b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
4250*0b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
4251*0b57cec5SDimitry Andric                        XS, Requires<[UseSSE2]>;
4252*0b57cec5SDimitry Andric} // ExeDomain, SchedRW
4253*0b57cec5SDimitry Andric
4254*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
4255*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4256*0b57cec5SDimitry Andric            (VMOVZPQILo2PQIrr VR128:$src)>;
4257*0b57cec5SDimitry Andric}
4258*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
4259*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4260*0b57cec5SDimitry Andric            (MOVZPQILo2PQIrr VR128:$src)>;
4261*0b57cec5SDimitry Andric}
4262*0b57cec5SDimitry Andric
4263*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
4264*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
4265*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
4266*0b57cec5SDimitry Andric             (v2f64 (VMOVZPQILo2PQIrr
4267*0b57cec5SDimitry Andric                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
4268*0b57cec5SDimitry Andric             sub_xmm)>;
4269*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
4270*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
4271*0b57cec5SDimitry Andric             (v2i64 (VMOVZPQILo2PQIrr
4272*0b57cec5SDimitry Andric                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
4273*0b57cec5SDimitry Andric             sub_xmm)>;
4274*0b57cec5SDimitry Andric}
4275*0b57cec5SDimitry Andric
4276*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4277*0b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
4278*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4279*0b57cec5SDimitry Andric
4280*0b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
4281*0b57cec5SDimitry Andric                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
4282*0b57cec5SDimitry Andric                              X86MemOperand x86memop, X86FoldableSchedWrite sched> {
4283*0b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4284*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4285*0b57cec5SDimitry Andric                      [(set RC:$dst, (vt (OpNode RC:$src)))]>,
4286*0b57cec5SDimitry Andric                      Sched<[sched]>;
4287*0b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4288*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4289*0b57cec5SDimitry Andric                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>,
4290*0b57cec5SDimitry Andric                      Sched<[sched.Folded]>;
4291*0b57cec5SDimitry Andric}
4292*0b57cec5SDimitry Andric
4293*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
4294*0b57cec5SDimitry Andric  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4295*0b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
4296*0b57cec5SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
4297*0b57cec5SDimitry Andric  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4298*0b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
4299*0b57cec5SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
4300*0b57cec5SDimitry Andric  defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4301*0b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
4302*0b57cec5SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
4303*0b57cec5SDimitry Andric  defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4304*0b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
4305*0b57cec5SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
4306*0b57cec5SDimitry Andric}
4307*0b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
4308*0b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
4309*0b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
4310*0b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
4311*0b57cec5SDimitry Andric
4312*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
4313*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
4314*0b57cec5SDimitry Andric            (VMOVSHDUPrr VR128:$src)>;
4315*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
4316*0b57cec5SDimitry Andric            (VMOVSHDUPrm addr:$src)>;
4317*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
4318*0b57cec5SDimitry Andric            (VMOVSLDUPrr VR128:$src)>;
4319*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
4320*0b57cec5SDimitry Andric            (VMOVSLDUPrm addr:$src)>;
4321*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup VR256:$src)),
4322*0b57cec5SDimitry Andric            (VMOVSHDUPYrr VR256:$src)>;
4323*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
4324*0b57cec5SDimitry Andric            (VMOVSHDUPYrm addr:$src)>;
4325*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup VR256:$src)),
4326*0b57cec5SDimitry Andric            (VMOVSLDUPYrr VR256:$src)>;
4327*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
4328*0b57cec5SDimitry Andric            (VMOVSLDUPYrm addr:$src)>;
4329*0b57cec5SDimitry Andric}
4330*0b57cec5SDimitry Andric
4331*0b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
4332*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
4333*0b57cec5SDimitry Andric            (MOVSHDUPrr VR128:$src)>;
4334*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
4335*0b57cec5SDimitry Andric            (MOVSHDUPrm addr:$src)>;
4336*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
4337*0b57cec5SDimitry Andric            (MOVSLDUPrr VR128:$src)>;
4338*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
4339*0b57cec5SDimitry Andric            (MOVSLDUPrm addr:$src)>;
4340*0b57cec5SDimitry Andric}
4341*0b57cec5SDimitry Andric
4342*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4343*0b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP
4344*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4345*0b57cec5SDimitry Andric
4346*0b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> {
4347*0b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4348*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4349*0b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>,
4350*0b57cec5SDimitry Andric                    Sched<[sched.XMM]>;
4351*0b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
4352*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4353*0b57cec5SDimitry Andric                    [(set VR128:$dst,
4354*0b57cec5SDimitry Andric                      (v2f64 (X86Movddup
4355*0b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src)))))]>,
4356*0b57cec5SDimitry Andric                    Sched<[sched.XMM.Folded]>;
4357*0b57cec5SDimitry Andric}
4358*0b57cec5SDimitry Andric
4359*0b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version
4360*0b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> {
4361*0b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
4362*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4363*0b57cec5SDimitry Andric                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
4364*0b57cec5SDimitry Andric                    Sched<[sched.YMM]>;
4365*0b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
4366*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4367*0b57cec5SDimitry Andric                    [(set VR256:$dst,
4368*0b57cec5SDimitry Andric                      (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
4369*0b57cec5SDimitry Andric                    Sched<[sched.YMM.Folded]>;
4370*0b57cec5SDimitry Andric}
4371*0b57cec5SDimitry Andric
4372*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
4373*0b57cec5SDimitry Andric  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>,
4374*0b57cec5SDimitry Andric                                      VEX, VEX_WIG;
4375*0b57cec5SDimitry Andric  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>,
4376*0b57cec5SDimitry Andric                                        VEX, VEX_L, VEX_WIG;
4377*0b57cec5SDimitry Andric}
4378*0b57cec5SDimitry Andric
4379*0b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
4380*0b57cec5SDimitry Andric
4381*0b57cec5SDimitry Andric
4382*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
4383*0b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
4384*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4385*0b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
4386*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
4387*0b57cec5SDimitry Andric}
4388*0b57cec5SDimitry Andric
4389*0b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
4390*0b57cec5SDimitry Andric  // No need for aligned memory as this only loads 64-bits.
4391*0b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
4392*0b57cec5SDimitry Andric            (MOVDDUPrm addr:$src)>;
4393*0b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
4394*0b57cec5SDimitry Andric            (MOVDDUPrm addr:$src)>;
4395*0b57cec5SDimitry Andric}
4396*0b57cec5SDimitry Andric
4397*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4398*0b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer
4399*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4400*0b57cec5SDimitry Andric
4401*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
4402*0b57cec5SDimitry Andric  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4403*0b57cec5SDimitry Andric                      "vlddqu\t{$src, $dst|$dst, $src}",
4404*0b57cec5SDimitry Andric                      [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
4405*0b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
4406*0b57cec5SDimitry Andric  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
4407*0b57cec5SDimitry Andric                       "vlddqu\t{$src, $dst|$dst, $src}",
4408*0b57cec5SDimitry Andric                       [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
4409*0b57cec5SDimitry Andric                       Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
4410*0b57cec5SDimitry Andric} // Predicates
4411*0b57cec5SDimitry Andric
4412*0b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4413*0b57cec5SDimitry Andric                   "lddqu\t{$src, $dst|$dst, $src}",
4414*0b57cec5SDimitry Andric                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
4415*0b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>;
4416*0b57cec5SDimitry Andric
4417*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4418*0b57cec5SDimitry Andric// SSE3 - Arithmetic
4419*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4420*0b57cec5SDimitry Andric
4421*0b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
4422*0b57cec5SDimitry Andric                       X86MemOperand x86memop, X86FoldableSchedWrite sched,
4423*0b57cec5SDimitry Andric                       PatFrag ld_frag, bit Is2Addr = 1> {
4424*0b57cec5SDimitry Andric  def rr : I<0xD0, MRMSrcReg,
4425*0b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, RC:$src2),
4426*0b57cec5SDimitry Andric       !if(Is2Addr,
4427*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4428*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4429*0b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>,
4430*0b57cec5SDimitry Andric       Sched<[sched]>;
4431*0b57cec5SDimitry Andric  def rm : I<0xD0, MRMSrcMem,
4432*0b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4433*0b57cec5SDimitry Andric       !if(Is2Addr,
4434*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4435*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4436*0b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
4437*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
4438*0b57cec5SDimitry Andric}
4439*0b57cec5SDimitry Andric
4440*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
4441*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
4442*0b57cec5SDimitry Andric    defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
4443*0b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
4444*0b57cec5SDimitry Andric                                 XD, VEX_4V, VEX_WIG;
4445*0b57cec5SDimitry Andric    defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
4446*0b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
4447*0b57cec5SDimitry Andric                                  XD, VEX_4V, VEX_L, VEX_WIG;
4448*0b57cec5SDimitry Andric  }
4449*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
4450*0b57cec5SDimitry Andric    defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
4451*0b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
4452*0b57cec5SDimitry Andric                                 PD, VEX_4V, VEX_WIG;
4453*0b57cec5SDimitry Andric    defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
4454*0b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
4455*0b57cec5SDimitry Andric                                  PD, VEX_4V, VEX_L, VEX_WIG;
4456*0b57cec5SDimitry Andric  }
4457*0b57cec5SDimitry Andric}
4458*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
4459*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
4460*0b57cec5SDimitry Andric  defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
4461*0b57cec5SDimitry Andric                              SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
4462*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
4463*0b57cec5SDimitry Andric  defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
4464*0b57cec5SDimitry Andric                              SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
4465*0b57cec5SDimitry Andric}
4466*0b57cec5SDimitry Andric
4467*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4468*0b57cec5SDimitry Andric// SSE3 Instructions
4469*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4470*0b57cec5SDimitry Andric
4471*0b57cec5SDimitry Andric// Horizontal ops
4472*0b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4473*0b57cec5SDimitry Andric                   X86MemOperand x86memop, SDNode OpNode,
4474*0b57cec5SDimitry Andric                   X86FoldableSchedWrite sched, PatFrag ld_frag,
4475*0b57cec5SDimitry Andric                   bit Is2Addr = 1> {
4476*0b57cec5SDimitry Andric  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4477*0b57cec5SDimitry Andric       !if(Is2Addr,
4478*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4479*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4480*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
4481*0b57cec5SDimitry Andric      Sched<[sched]>;
4482*0b57cec5SDimitry Andric
4483*0b57cec5SDimitry Andric  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4484*0b57cec5SDimitry Andric       !if(Is2Addr,
4485*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4486*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4487*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
4488*0b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
4489*0b57cec5SDimitry Andric}
4490*0b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4491*0b57cec5SDimitry Andric                  X86MemOperand x86memop, SDNode OpNode,
4492*0b57cec5SDimitry Andric                  X86FoldableSchedWrite sched, PatFrag ld_frag,
4493*0b57cec5SDimitry Andric                  bit Is2Addr = 1> {
4494*0b57cec5SDimitry Andric  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4495*0b57cec5SDimitry Andric       !if(Is2Addr,
4496*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4497*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4498*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
4499*0b57cec5SDimitry Andric        Sched<[sched]>;
4500*0b57cec5SDimitry Andric
4501*0b57cec5SDimitry Andric  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4502*0b57cec5SDimitry Andric       !if(Is2Addr,
4503*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4504*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4505*0b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
4506*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
4507*0b57cec5SDimitry Andric}
4508*0b57cec5SDimitry Andric
4509*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
4510*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
4511*0b57cec5SDimitry Andric    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
4512*0b57cec5SDimitry Andric                            X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
4513*0b57cec5SDimitry Andric    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
4514*0b57cec5SDimitry Andric                            X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
4515*0b57cec5SDimitry Andric    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
4516*0b57cec5SDimitry Andric                            X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4517*0b57cec5SDimitry Andric    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
4518*0b57cec5SDimitry Andric                            X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4519*0b57cec5SDimitry Andric  }
4520*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
4521*0b57cec5SDimitry Andric    defm VHADDPD  : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
4522*0b57cec5SDimitry Andric                           X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
4523*0b57cec5SDimitry Andric    defm VHSUBPD  : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
4524*0b57cec5SDimitry Andric                           X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
4525*0b57cec5SDimitry Andric    defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
4526*0b57cec5SDimitry Andric                           X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4527*0b57cec5SDimitry Andric    defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
4528*0b57cec5SDimitry Andric                           X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4529*0b57cec5SDimitry Andric  }
4530*0b57cec5SDimitry Andric}
4531*0b57cec5SDimitry Andric
4532*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
4533*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
4534*0b57cec5SDimitry Andric    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
4535*0b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
4536*0b57cec5SDimitry Andric    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
4537*0b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
4538*0b57cec5SDimitry Andric  }
4539*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
4540*0b57cec5SDimitry Andric    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
4541*0b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
4542*0b57cec5SDimitry Andric    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
4543*0b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
4544*0b57cec5SDimitry Andric  }
4545*0b57cec5SDimitry Andric}
4546*0b57cec5SDimitry Andric
4547*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4548*0b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions
4549*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4550*0b57cec5SDimitry Andric
4551*0b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4552*0b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
4553*0b57cec5SDimitry Andric                        SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> {
4554*0b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
4555*0b57cec5SDimitry Andric                 (ins VR128:$src),
4556*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4557*0b57cec5SDimitry Andric                 [(set VR128:$dst, (vt (OpNode VR128:$src)))]>,
4558*0b57cec5SDimitry Andric                 Sched<[sched.XMM]>;
4559*0b57cec5SDimitry Andric
4560*0b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
4561*0b57cec5SDimitry Andric                 (ins i128mem:$src),
4562*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4563*0b57cec5SDimitry Andric                 [(set VR128:$dst,
4564*0b57cec5SDimitry Andric                   (vt (OpNode (ld_frag addr:$src))))]>,
4565*0b57cec5SDimitry Andric                 Sched<[sched.XMM.Folded]>;
4566*0b57cec5SDimitry Andric}
4567*0b57cec5SDimitry Andric
4568*0b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4569*0b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
4570*0b57cec5SDimitry Andric                          SDNode OpNode, X86SchedWriteWidths sched> {
4571*0b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
4572*0b57cec5SDimitry Andric                  (ins VR256:$src),
4573*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4574*0b57cec5SDimitry Andric                  [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
4575*0b57cec5SDimitry Andric                  Sched<[sched.YMM]>;
4576*0b57cec5SDimitry Andric
4577*0b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
4578*0b57cec5SDimitry Andric                  (ins i256mem:$src),
4579*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4580*0b57cec5SDimitry Andric                  [(set VR256:$dst,
4581*0b57cec5SDimitry Andric                    (vt (OpNode (load addr:$src))))]>,
4582*0b57cec5SDimitry Andric                  Sched<[sched.YMM.Folded]>;
4583*0b57cec5SDimitry Andric}
4584*0b57cec5SDimitry Andric
4585*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
4586*0b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
4587*0b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
4588*0b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
4589*0b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
4590*0b57cec5SDimitry Andric}
4591*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
4592*0b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
4593*0b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
4594*0b57cec5SDimitry Andric}
4595*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
4596*0b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
4597*0b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
4598*0b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>,
4599*0b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
4600*0b57cec5SDimitry Andric}
4601*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
4602*0b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>,
4603*0b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
4604*0b57cec5SDimitry Andric}
4605*0b57cec5SDimitry Andric
4606*0b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
4607*0b57cec5SDimitry Andric                          memop>;
4608*0b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
4609*0b57cec5SDimitry Andric                          memop>;
4610*0b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
4611*0b57cec5SDimitry Andric                          memop>;
4612*0b57cec5SDimitry Andric
4613*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4614*0b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions
4615*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4616*0b57cec5SDimitry Andric
4617*0b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op
4618*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4619*0b57cec5SDimitry Andric                         ValueType DstVT, ValueType OpVT, RegisterClass RC,
4620*0b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
4621*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
4622*0b57cec5SDimitry Andric  let isCommutable = 1 in
4623*0b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
4624*0b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
4625*0b57cec5SDimitry Andric       !if(Is2Addr,
4626*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4627*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4628*0b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>,
4629*0b57cec5SDimitry Andric       Sched<[sched]>;
4630*0b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
4631*0b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
4632*0b57cec5SDimitry Andric       !if(Is2Addr,
4633*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4634*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4635*0b57cec5SDimitry Andric       [(set RC:$dst,
4636*0b57cec5SDimitry Andric         (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
4637*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
4638*0b57cec5SDimitry Andric}
4639*0b57cec5SDimitry Andric
4640*0b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
4641*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
4642*0b57cec5SDimitry Andric                             Intrinsic IntId128, X86FoldableSchedWrite sched,
4643*0b57cec5SDimitry Andric                             PatFrag ld_frag, bit Is2Addr = 1> {
4644*0b57cec5SDimitry Andric  let isCommutable = 1 in
4645*0b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
4646*0b57cec5SDimitry Andric       (ins VR128:$src1, VR128:$src2),
4647*0b57cec5SDimitry Andric       !if(Is2Addr,
4648*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4649*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4650*0b57cec5SDimitry Andric       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
4651*0b57cec5SDimitry Andric       Sched<[sched]>;
4652*0b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
4653*0b57cec5SDimitry Andric       (ins VR128:$src1, i128mem:$src2),
4654*0b57cec5SDimitry Andric       !if(Is2Addr,
4655*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4656*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4657*0b57cec5SDimitry Andric       [(set VR128:$dst,
4658*0b57cec5SDimitry Andric         (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
4659*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
4660*0b57cec5SDimitry Andric}
4661*0b57cec5SDimitry Andric
4662*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
4663*0b57cec5SDimitry Andric                               Intrinsic IntId256,
4664*0b57cec5SDimitry Andric                               X86FoldableSchedWrite sched> {
4665*0b57cec5SDimitry Andric  let isCommutable = 1 in
4666*0b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
4667*0b57cec5SDimitry Andric       (ins VR256:$src1, VR256:$src2),
4668*0b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4669*0b57cec5SDimitry Andric       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
4670*0b57cec5SDimitry Andric       Sched<[sched]>;
4671*0b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
4672*0b57cec5SDimitry Andric       (ins VR256:$src1, i256mem:$src2),
4673*0b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4674*0b57cec5SDimitry Andric       [(set VR256:$dst,
4675*0b57cec5SDimitry Andric         (IntId256 VR256:$src1, (load addr:$src2)))]>,
4676*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
4677*0b57cec5SDimitry Andric}
4678*0b57cec5SDimitry Andric
4679*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
4680*0b57cec5SDimitry Andriclet isCommutable = 0 in {
4681*0b57cec5SDimitry Andric  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
4682*0b57cec5SDimitry Andric                                  VR128, load, i128mem,
4683*0b57cec5SDimitry Andric                                  SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
4684*0b57cec5SDimitry Andric  defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
4685*0b57cec5SDimitry Andric                                  v16i8, VR128, load, i128mem,
4686*0b57cec5SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
4687*0b57cec5SDimitry Andric}
4688*0b57cec5SDimitry Andricdefm VPMULHRSW    : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
4689*0b57cec5SDimitry Andric                                  VR128, load, i128mem,
4690*0b57cec5SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
4691*0b57cec5SDimitry Andric}
4692*0b57cec5SDimitry Andric
4693*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in {
4694*0b57cec5SDimitry Andriclet isCommutable = 0 in {
4695*0b57cec5SDimitry Andric  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
4696*0b57cec5SDimitry Andric                                  load, i128mem,
4697*0b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
4698*0b57cec5SDimitry Andric  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
4699*0b57cec5SDimitry Andric                                  load, i128mem,
4700*0b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
4701*0b57cec5SDimitry Andric  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
4702*0b57cec5SDimitry Andric                                  load, i128mem,
4703*0b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
4704*0b57cec5SDimitry Andric  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
4705*0b57cec5SDimitry Andric                                  load, i128mem,
4706*0b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V;
4707*0b57cec5SDimitry Andric  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
4708*0b57cec5SDimitry Andric                                      int_x86_ssse3_psign_b_128,
4709*0b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
4710*0b57cec5SDimitry Andric  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw",
4711*0b57cec5SDimitry Andric                                      int_x86_ssse3_psign_w_128,
4712*0b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
4713*0b57cec5SDimitry Andric  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
4714*0b57cec5SDimitry Andric                                      int_x86_ssse3_psign_d_128,
4715*0b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
4716*0b57cec5SDimitry Andric  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
4717*0b57cec5SDimitry Andric                                      int_x86_ssse3_phadd_sw_128,
4718*0b57cec5SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
4719*0b57cec5SDimitry Andric  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
4720*0b57cec5SDimitry Andric                                      int_x86_ssse3_phsub_sw_128,
4721*0b57cec5SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
4722*0b57cec5SDimitry Andric}
4723*0b57cec5SDimitry Andric}
4724*0b57cec5SDimitry Andric
4725*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
4726*0b57cec5SDimitry Andriclet isCommutable = 0 in {
4727*0b57cec5SDimitry Andric  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
4728*0b57cec5SDimitry Andric                                  VR256, load, i256mem,
4729*0b57cec5SDimitry Andric                                  SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4730*0b57cec5SDimitry Andric  defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
4731*0b57cec5SDimitry Andric                                   v32i8, VR256, load, i256mem,
4732*0b57cec5SDimitry Andric                                   SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4733*0b57cec5SDimitry Andric}
4734*0b57cec5SDimitry Andricdefm VPMULHRSWY   : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
4735*0b57cec5SDimitry Andric                                  VR256, load, i256mem,
4736*0b57cec5SDimitry Andric                                  SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4737*0b57cec5SDimitry Andric}
4738*0b57cec5SDimitry Andric
4739*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in {
4740*0b57cec5SDimitry Andriclet isCommutable = 0 in {
4741*0b57cec5SDimitry Andric  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
4742*0b57cec5SDimitry Andric                                  VR256, load, i256mem,
4743*0b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4744*0b57cec5SDimitry Andric  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
4745*0b57cec5SDimitry Andric                                  load, i256mem,
4746*0b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4747*0b57cec5SDimitry Andric  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
4748*0b57cec5SDimitry Andric                                  VR256, load, i256mem,
4749*0b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4750*0b57cec5SDimitry Andric  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
4751*0b57cec5SDimitry Andric                                  load, i256mem,
4752*0b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
4753*0b57cec5SDimitry Andric  defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
4754*0b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
4755*0b57cec5SDimitry Andric  defm VPSIGNW   : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
4756*0b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
4757*0b57cec5SDimitry Andric  defm VPSIGND   : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
4758*0b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
4759*0b57cec5SDimitry Andric  defm VPHADDSW  : SS3I_binop_rm_int_y<0x03, "vphaddsw",
4760*0b57cec5SDimitry Andric                                       int_x86_avx2_phadd_sw,
4761*0b57cec5SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
4762*0b57cec5SDimitry Andric  defm VPHSUBSW  : SS3I_binop_rm_int_y<0x07, "vphsubsw",
4763*0b57cec5SDimitry Andric                                       int_x86_avx2_phsub_sw,
4764*0b57cec5SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
4765*0b57cec5SDimitry Andric}
4766*0b57cec5SDimitry Andric}
4767*0b57cec5SDimitry Andric
4768*0b57cec5SDimitry Andric// None of these have i8 immediate fields.
4769*0b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in {
4770*0b57cec5SDimitry Andriclet isCommutable = 0 in {
4771*0b57cec5SDimitry Andric  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
4772*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
4773*0b57cec5SDimitry Andric  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
4774*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
4775*0b57cec5SDimitry Andric  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
4776*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
4777*0b57cec5SDimitry Andric  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
4778*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
4779*0b57cec5SDimitry Andric  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
4780*0b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
4781*0b57cec5SDimitry Andric  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
4782*0b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
4783*0b57cec5SDimitry Andric  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
4784*0b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
4785*0b57cec5SDimitry Andric  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
4786*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVarShuffle.XMM>;
4787*0b57cec5SDimitry Andric  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
4788*0b57cec5SDimitry Andric                                     int_x86_ssse3_phadd_sw_128,
4789*0b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
4790*0b57cec5SDimitry Andric  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
4791*0b57cec5SDimitry Andric                                     int_x86_ssse3_phsub_sw_128,
4792*0b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
4793*0b57cec5SDimitry Andric  defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
4794*0b57cec5SDimitry Andric                                 v16i8, VR128, memop, i128mem,
4795*0b57cec5SDimitry Andric                                 SchedWriteVecIMul.XMM>;
4796*0b57cec5SDimitry Andric}
4797*0b57cec5SDimitry Andricdefm PMULHRSW    : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
4798*0b57cec5SDimitry Andric                                 VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
4799*0b57cec5SDimitry Andric}
4800*0b57cec5SDimitry Andric
4801*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4802*0b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns
4803*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4804*0b57cec5SDimitry Andric
4805*0b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
4806*0b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
4807*0b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
4808*0b57cec5SDimitry Andric  let hasSideEffects = 0 in {
4809*0b57cec5SDimitry Andric  def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst),
4810*0b57cec5SDimitry Andric      (ins RC:$src1, RC:$src2, u8imm:$src3),
4811*0b57cec5SDimitry Andric      !if(Is2Addr,
4812*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4813*0b57cec5SDimitry Andric        !strconcat(asm,
4814*0b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
4815*0b57cec5SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>,
4816*0b57cec5SDimitry Andric      Sched<[sched]>;
4817*0b57cec5SDimitry Andric  let mayLoad = 1 in
4818*0b57cec5SDimitry Andric  def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
4819*0b57cec5SDimitry Andric      (ins RC:$src1, x86memop:$src2, u8imm:$src3),
4820*0b57cec5SDimitry Andric      !if(Is2Addr,
4821*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
4822*0b57cec5SDimitry Andric        !strconcat(asm,
4823*0b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
4824*0b57cec5SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1,
4825*0b57cec5SDimitry Andric                                     (memop_frag addr:$src2),
4826*0b57cec5SDimitry Andric                                     (i8 imm:$src3))))]>,
4827*0b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
4828*0b57cec5SDimitry Andric  }
4829*0b57cec5SDimitry Andric}
4830*0b57cec5SDimitry Andric
4831*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
4832*0b57cec5SDimitry Andric  defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
4833*0b57cec5SDimitry Andric                                SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
4834*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
4835*0b57cec5SDimitry Andric  defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
4836*0b57cec5SDimitry Andric                                 SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
4837*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
4838*0b57cec5SDimitry Andric  defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
4839*0b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
4840*0b57cec5SDimitry Andric
4841*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4842*0b57cec5SDimitry Andric// SSSE3 - Thread synchronization
4843*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
4844*0b57cec5SDimitry Andric
4845*0b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
4846*0b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in
4847*0b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
4848*0b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, Not64BitMode]>;
4849*0b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in
4850*0b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
4851*0b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, In64BitMode]>;
4852*0b57cec5SDimitry Andric
4853*0b57cec5SDimitry Andriclet Uses = [ECX, EAX] in
4854*0b57cec5SDimitry Andricdef MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
4855*0b57cec5SDimitry Andric                  [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
4856*0b57cec5SDimitry Andric} // SchedRW
4857*0b57cec5SDimitry Andric
4858*0b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
4859*0b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
4860*0b57cec5SDimitry Andric
4861*0b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
4862*0b57cec5SDimitry Andric      Requires<[Not64BitMode]>;
4863*0b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
4864*0b57cec5SDimitry Andric      Requires<[In64BitMode]>;
4865*0b57cec5SDimitry Andric
4866*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
4867*0b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend
4868*0b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
4869*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
4870*0b57cec5SDimitry Andric
4871*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
4872*0b57cec5SDimitry Andric                            RegisterClass OutRC, RegisterClass InRC,
4873*0b57cec5SDimitry Andric                            X86FoldableSchedWrite sched> {
4874*0b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
4875*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
4876*0b57cec5SDimitry Andric                 Sched<[sched]>;
4877*0b57cec5SDimitry Andric
4878*0b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
4879*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
4880*0b57cec5SDimitry Andric                 Sched<[sched.Folded]>;
4881*0b57cec5SDimitry Andric}
4882*0b57cec5SDimitry Andric
4883*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
4884*0b57cec5SDimitry Andric                              X86MemOperand MemOp, X86MemOperand MemYOp,
4885*0b57cec5SDimitry Andric                              Predicate prd> {
4886*0b57cec5SDimitry Andric  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
4887*0b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
4888*0b57cec5SDimitry Andric  let Predicates = [HasAVX, prd] in
4889*0b57cec5SDimitry Andric    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
4890*0b57cec5SDimitry Andric                                     VR128, VR128, SchedWriteShuffle.XMM>,
4891*0b57cec5SDimitry Andric                                     VEX, VEX_WIG;
4892*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in
4893*0b57cec5SDimitry Andric    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
4894*0b57cec5SDimitry Andric                                     VR256, VR128, WriteShuffle256>,
4895*0b57cec5SDimitry Andric                                     VEX, VEX_L, VEX_WIG;
4896*0b57cec5SDimitry Andric}
4897*0b57cec5SDimitry Andric
4898*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
4899*0b57cec5SDimitry Andric                          X86MemOperand MemYOp, Predicate prd> {
4900*0b57cec5SDimitry Andric  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
4901*0b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
4902*0b57cec5SDimitry Andric  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
4903*0b57cec5SDimitry Andric                                        !strconcat("pmovzx", OpcodeStr),
4904*0b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
4905*0b57cec5SDimitry Andric}
4906*0b57cec5SDimitry Andric
4907*0b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
4908*0b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>;
4909*0b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>;
4910*0b57cec5SDimitry Andric
4911*0b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>;
4912*0b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
4913*0b57cec5SDimitry Andric
4914*0b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
4915*0b57cec5SDimitry Andric
4916*0b57cec5SDimitry Andric// AVX2 Patterns
4917*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
4918*0b57cec5SDimitry Andric                                     SDNode ExtOp, SDNode InVecOp> {
4919*0b57cec5SDimitry Andric  // Register-Register patterns
4920*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
4921*0b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
4922*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
4923*0b57cec5SDimitry Andric  }
4924*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
4925*0b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
4926*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
4927*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
4928*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
4929*0b57cec5SDimitry Andric
4930*0b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
4931*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
4932*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
4933*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
4934*0b57cec5SDimitry Andric
4935*0b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
4936*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
4937*0b57cec5SDimitry Andric  }
4938*0b57cec5SDimitry Andric
4939*0b57cec5SDimitry Andric  // Simple Register-Memory patterns
4940*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
4941*0b57cec5SDimitry Andric  def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
4942*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
4943*0b57cec5SDimitry Andric
4944*0b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
4945*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
4946*0b57cec5SDimitry Andric  }
4947*0b57cec5SDimitry Andric
4948*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
4949*0b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
4950*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
4951*0b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
4952*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
4953*0b57cec5SDimitry Andric
4954*0b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
4955*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
4956*0b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
4957*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
4958*0b57cec5SDimitry Andric
4959*0b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
4960*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
4961*0b57cec5SDimitry Andric  }
4962*0b57cec5SDimitry Andric
4963*0b57cec5SDimitry Andric  // AVX2 Register-Memory patterns
4964*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
4965*0b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
4966*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
4967*0b57cec5SDimitry Andric
4968*0b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
4969*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
4970*0b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
4971*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
4972*0b57cec5SDimitry Andric
4973*0b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
4974*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
4975*0b57cec5SDimitry Andric
4976*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
4977*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
4978*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
4979*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
4980*0b57cec5SDimitry Andric
4981*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
4982*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
4983*0b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
4984*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
4985*0b57cec5SDimitry Andric  }
4986*0b57cec5SDimitry Andric}
4987*0b57cec5SDimitry Andric
4988*0b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
4989*0b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
4990*0b57cec5SDimitry Andric
4991*0b57cec5SDimitry Andric// SSE4.1/AVX patterns.
4992*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
4993*0b57cec5SDimitry Andric                                SDNode ExtOp> {
4994*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
4995*0b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
4996*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
4997*0b57cec5SDimitry Andric  }
4998*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
4999*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
5000*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
5001*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
5002*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
5003*0b57cec5SDimitry Andric
5004*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
5005*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
5006*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
5007*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
5008*0b57cec5SDimitry Andric
5009*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
5010*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
5011*0b57cec5SDimitry Andric  }
5012*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
5013*0b57cec5SDimitry Andric  def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
5014*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
5015*0b57cec5SDimitry Andric  }
5016*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
5017*0b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
5018*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
5019*0b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
5020*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
5021*0b57cec5SDimitry Andric
5022*0b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
5023*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
5024*0b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
5025*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
5026*0b57cec5SDimitry Andric
5027*0b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
5028*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
5029*0b57cec5SDimitry Andric  }
5030*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
5031*0b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
5032*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
5033*0b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
5034*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
5035*0b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
5036*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
5037*0b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
5038*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
5039*0b57cec5SDimitry Andric  }
5040*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
5041*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
5042*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
5043*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
5044*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
5045*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
5046*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
5047*0b57cec5SDimitry Andric
5048*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
5049*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
5050*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
5051*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
5052*0b57cec5SDimitry Andric
5053*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
5054*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
5055*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
5056*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
5057*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
5058*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
5059*0b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
5060*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
5061*0b57cec5SDimitry Andric
5062*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
5063*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
5064*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
5065*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
5066*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
5067*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
5068*0b57cec5SDimitry Andric
5069*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
5070*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
5071*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
5072*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
5073*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
5074*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
5075*0b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
5076*0b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
5077*0b57cec5SDimitry Andric  }
5078*0b57cec5SDimitry Andric}
5079*0b57cec5SDimitry Andric
5080*0b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>;
5081*0b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>;
5082*0b57cec5SDimitry Andric
5083*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
5084*0b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>;
5085*0b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>;
5086*0b57cec5SDimitry Andric}
5087*0b57cec5SDimitry Andric
5088*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5089*0b57cec5SDimitry Andric// SSE4.1 - Extract Instructions
5090*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5091*0b57cec5SDimitry Andric
5092*0b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
5093*0b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> {
5094*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
5095*0b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
5096*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5097*0b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5098*0b57cec5SDimitry Andric                 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
5099*0b57cec5SDimitry Andric                                         imm:$src2))]>,
5100*0b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
5101*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
5102*0b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5103*0b57cec5SDimitry Andric                 (ins i8mem:$dst, VR128:$src1, u8imm:$src2),
5104*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5105*0b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5106*0b57cec5SDimitry Andric                 [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),
5107*0b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5108*0b57cec5SDimitry Andric}
5109*0b57cec5SDimitry Andric
5110*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
5111*0b57cec5SDimitry Andric  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
5112*0b57cec5SDimitry Andric
5113*0b57cec5SDimitry Andricdefm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
5114*0b57cec5SDimitry Andric
5115*0b57cec5SDimitry Andric
5116*0b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
5117*0b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> {
5118*0b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
5119*0b57cec5SDimitry Andric  def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
5120*0b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
5121*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
5122*0b57cec5SDimitry Andric                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
5123*0b57cec5SDimitry Andric                   Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>;
5124*0b57cec5SDimitry Andric
5125*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
5126*0b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5127*0b57cec5SDimitry Andric                 (ins i16mem:$dst, VR128:$src1, u8imm:$src2),
5128*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5129*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5130*0b57cec5SDimitry Andric                 [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))),
5131*0b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5132*0b57cec5SDimitry Andric}
5133*0b57cec5SDimitry Andric
5134*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
5135*0b57cec5SDimitry Andric  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
5136*0b57cec5SDimitry Andric
5137*0b57cec5SDimitry Andricdefm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
5138*0b57cec5SDimitry Andric
5139*0b57cec5SDimitry Andric
5140*0b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5141*0b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> {
5142*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5143*0b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
5144*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5145*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5146*0b57cec5SDimitry Andric                 [(set GR32:$dst,
5147*0b57cec5SDimitry Andric                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
5148*0b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
5149*0b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5150*0b57cec5SDimitry Andric                 (ins i32mem:$dst, VR128:$src1, u8imm:$src2),
5151*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5152*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5153*0b57cec5SDimitry Andric                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
5154*0b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5155*0b57cec5SDimitry Andric}
5156*0b57cec5SDimitry Andric
5157*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
5158*0b57cec5SDimitry Andric  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
5159*0b57cec5SDimitry Andric
5160*0b57cec5SDimitry Andricdefm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
5161*0b57cec5SDimitry Andric
5162*0b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5163*0b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> {
5164*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
5165*0b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
5166*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5167*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5168*0b57cec5SDimitry Andric                 [(set GR64:$dst,
5169*0b57cec5SDimitry Andric                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>,
5170*0b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
5171*0b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5172*0b57cec5SDimitry Andric                 (ins i64mem:$dst, VR128:$src1, u8imm:$src2),
5173*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
5174*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5175*0b57cec5SDimitry Andric                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
5176*0b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5177*0b57cec5SDimitry Andric}
5178*0b57cec5SDimitry Andric
5179*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
5180*0b57cec5SDimitry Andric  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
5181*0b57cec5SDimitry Andric
5182*0b57cec5SDimitry Andricdefm PEXTRQ      : SS41I_extract64<0x16, "pextrq">, REX_W;
5183*0b57cec5SDimitry Andric
5184*0b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
5185*0b57cec5SDimitry Andric/// destination
5186*0b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
5187*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
5188*0b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
5189*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
5190*0b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5191*0b57cec5SDimitry Andric                   [(set GR32orGR64:$dst,
5192*0b57cec5SDimitry Andric                      (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
5193*0b57cec5SDimitry Andric                   Sched<[WriteVecExtract]>;
5194*0b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5195*0b57cec5SDimitry Andric                   (ins f32mem:$dst, VR128:$src1, u8imm:$src2),
5196*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
5197*0b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5198*0b57cec5SDimitry Andric                   [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
5199*0b57cec5SDimitry Andric                            addr:$dst)]>, Sched<[WriteVecExtractSt]>;
5200*0b57cec5SDimitry Andric}
5201*0b57cec5SDimitry Andric
5202*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
5203*0b57cec5SDimitry Andric  let Predicates = [UseAVX] in
5204*0b57cec5SDimitry Andric    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
5205*0b57cec5SDimitry Andric  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
5206*0b57cec5SDimitry Andric}
5207*0b57cec5SDimitry Andric
5208*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5209*0b57cec5SDimitry Andric// SSE4.1 - Insert Instructions
5210*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5211*0b57cec5SDimitry Andric
5212*0b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
5213*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5214*0b57cec5SDimitry Andric      (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3),
5215*0b57cec5SDimitry Andric      !if(Is2Addr,
5216*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5217*0b57cec5SDimitry Andric        !strconcat(asm,
5218*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5219*0b57cec5SDimitry Andric      [(set VR128:$dst,
5220*0b57cec5SDimitry Andric        (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
5221*0b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
5222*0b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5223*0b57cec5SDimitry Andric      (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
5224*0b57cec5SDimitry Andric      !if(Is2Addr,
5225*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5226*0b57cec5SDimitry Andric        !strconcat(asm,
5227*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5228*0b57cec5SDimitry Andric      [(set VR128:$dst,
5229*0b57cec5SDimitry Andric        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
5230*0b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
5231*0b57cec5SDimitry Andric}
5232*0b57cec5SDimitry Andric
5233*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
5234*0b57cec5SDimitry Andric  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
5235*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
5236*0b57cec5SDimitry Andric  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
5237*0b57cec5SDimitry Andric
5238*0b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
5239*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5240*0b57cec5SDimitry Andric      (ins VR128:$src1, GR32:$src2, u8imm:$src3),
5241*0b57cec5SDimitry Andric      !if(Is2Addr,
5242*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5243*0b57cec5SDimitry Andric        !strconcat(asm,
5244*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5245*0b57cec5SDimitry Andric      [(set VR128:$dst,
5246*0b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
5247*0b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
5248*0b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5249*0b57cec5SDimitry Andric      (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
5250*0b57cec5SDimitry Andric      !if(Is2Addr,
5251*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5252*0b57cec5SDimitry Andric        !strconcat(asm,
5253*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5254*0b57cec5SDimitry Andric      [(set VR128:$dst,
5255*0b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
5256*0b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
5257*0b57cec5SDimitry Andric}
5258*0b57cec5SDimitry Andric
5259*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
5260*0b57cec5SDimitry Andric  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
5261*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
5262*0b57cec5SDimitry Andric  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
5263*0b57cec5SDimitry Andric
5264*0b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
5265*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5266*0b57cec5SDimitry Andric      (ins VR128:$src1, GR64:$src2, u8imm:$src3),
5267*0b57cec5SDimitry Andric      !if(Is2Addr,
5268*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5269*0b57cec5SDimitry Andric        !strconcat(asm,
5270*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5271*0b57cec5SDimitry Andric      [(set VR128:$dst,
5272*0b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
5273*0b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
5274*0b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5275*0b57cec5SDimitry Andric      (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
5276*0b57cec5SDimitry Andric      !if(Is2Addr,
5277*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5278*0b57cec5SDimitry Andric        !strconcat(asm,
5279*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5280*0b57cec5SDimitry Andric      [(set VR128:$dst,
5281*0b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
5282*0b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
5283*0b57cec5SDimitry Andric}
5284*0b57cec5SDimitry Andric
5285*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
5286*0b57cec5SDimitry Andric  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
5287*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
5288*0b57cec5SDimitry Andric  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
5289*0b57cec5SDimitry Andric
5290*0b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which
5291*0b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination
5292*0b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements
5293*0b57cec5SDimitry Andric// in the target vector.
5294*0b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
5295*0b57cec5SDimitry Andric  let isCommutable = 1 in
5296*0b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
5297*0b57cec5SDimitry Andric      (ins VR128:$src1, VR128:$src2, u8imm:$src3),
5298*0b57cec5SDimitry Andric      !if(Is2Addr,
5299*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5300*0b57cec5SDimitry Andric        !strconcat(asm,
5301*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5302*0b57cec5SDimitry Andric      [(set VR128:$dst,
5303*0b57cec5SDimitry Andric        (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>,
5304*0b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM]>;
5305*0b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
5306*0b57cec5SDimitry Andric      (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
5307*0b57cec5SDimitry Andric      !if(Is2Addr,
5308*0b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5309*0b57cec5SDimitry Andric        !strconcat(asm,
5310*0b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5311*0b57cec5SDimitry Andric      [(set VR128:$dst,
5312*0b57cec5SDimitry Andric        (X86insertps VR128:$src1,
5313*0b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
5314*0b57cec5SDimitry Andric                    imm:$src3))]>,
5315*0b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
5316*0b57cec5SDimitry Andric}
5317*0b57cec5SDimitry Andric
5318*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
5319*0b57cec5SDimitry Andric  let Predicates = [UseAVX] in
5320*0b57cec5SDimitry Andric    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
5321*0b57cec5SDimitry Andric                     VEX_4V, VEX_WIG;
5322*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
5323*0b57cec5SDimitry Andric    defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
5324*0b57cec5SDimitry Andric}
5325*0b57cec5SDimitry Andric
5326*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
5327*0b57cec5SDimitry Andric  // If we're inserting an element from a vbroadcast of a load, fold the
5328*0b57cec5SDimitry Andric  // load into the X86insertps instruction.
5329*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
5330*0b57cec5SDimitry Andric                (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)),
5331*0b57cec5SDimitry Andric            (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
5332*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
5333*0b57cec5SDimitry Andric                (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)),
5334*0b57cec5SDimitry Andric            (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
5335*0b57cec5SDimitry Andric}
5336*0b57cec5SDimitry Andric
5337*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5338*0b57cec5SDimitry Andric// SSE4.1 - Round Instructions
5339*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5340*0b57cec5SDimitry Andric
5341*0b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
5342*0b57cec5SDimitry Andric                           X86MemOperand x86memop, RegisterClass RC,
5343*0b57cec5SDimitry Andric                           ValueType VT, PatFrag mem_frag, SDNode OpNode,
5344*0b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
5345*0b57cec5SDimitry Andric  // Intrinsic operation, reg.
5346*0b57cec5SDimitry Andric  // Vector intrinsic operation, reg
5347*0b57cec5SDimitry Andric  def r : SS4AIi8<opc, MRMSrcReg,
5348*0b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
5349*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
5350*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5351*0b57cec5SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>,
5352*0b57cec5SDimitry Andric                  Sched<[sched]>;
5353*0b57cec5SDimitry Andric
5354*0b57cec5SDimitry Andric  // Vector intrinsic operation, mem
5355*0b57cec5SDimitry Andric  def m : SS4AIi8<opc, MRMSrcMem,
5356*0b57cec5SDimitry Andric                  (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
5357*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
5358*0b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5359*0b57cec5SDimitry Andric                  [(set RC:$dst,
5360*0b57cec5SDimitry Andric                        (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>,
5361*0b57cec5SDimitry Andric                  Sched<[sched.Folded]>;
5362*0b57cec5SDimitry Andric}
5363*0b57cec5SDimitry Andric
5364*0b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
5365*0b57cec5SDimitry Andric                          string OpcodeStr, X86FoldableSchedWrite sched> {
5366*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
5367*0b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
5368*0b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
5369*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
5370*0b57cec5SDimitry Andric            "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5371*0b57cec5SDimitry Andric      []>, Sched<[sched]>;
5372*0b57cec5SDimitry Andric
5373*0b57cec5SDimitry Andric  let mayLoad = 1 in
5374*0b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
5375*0b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
5376*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
5377*0b57cec5SDimitry Andric             "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5378*0b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
5379*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
5380*0b57cec5SDimitry Andric
5381*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
5382*0b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
5383*0b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
5384*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
5385*0b57cec5SDimitry Andric              "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5386*0b57cec5SDimitry Andric        []>, Sched<[sched]>;
5387*0b57cec5SDimitry Andric
5388*0b57cec5SDimitry Andric  let mayLoad = 1 in
5389*0b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
5390*0b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
5391*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
5392*0b57cec5SDimitry Andric             "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5393*0b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
5394*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
5395*0b57cec5SDimitry Andric}
5396*0b57cec5SDimitry Andric
5397*0b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
5398*0b57cec5SDimitry Andric                           string OpcodeStr, X86FoldableSchedWrite sched> {
5399*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
5400*0b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
5401*0b57cec5SDimitry Andric                    (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
5402*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
5403*0b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5404*0b57cec5SDimitry Andric                    []>, Sched<[sched]>;
5405*0b57cec5SDimitry Andric
5406*0b57cec5SDimitry Andric  let mayLoad = 1 in
5407*0b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
5408*0b57cec5SDimitry Andric                    (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
5409*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
5410*0b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5411*0b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
5412*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
5413*0b57cec5SDimitry Andric
5414*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
5415*0b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
5416*0b57cec5SDimitry Andric                    (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
5417*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
5418*0b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5419*0b57cec5SDimitry Andric                    []>, Sched<[sched]>;
5420*0b57cec5SDimitry Andric
5421*0b57cec5SDimitry Andric  let mayLoad = 1 in
5422*0b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
5423*0b57cec5SDimitry Andric                    (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
5424*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
5425*0b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5426*0b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
5427*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
5428*0b57cec5SDimitry Andric}
5429*0b57cec5SDimitry Andric
5430*0b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
5431*0b57cec5SDimitry Andric                            string OpcodeStr, X86FoldableSchedWrite sched,
5432*0b57cec5SDimitry Andric                            ValueType VT32, ValueType VT64,
5433*0b57cec5SDimitry Andric                            SDNode OpNode, bit Is2Addr = 1> {
5434*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
5435*0b57cec5SDimitry Andric  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
5436*0b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
5437*0b57cec5SDimitry Andric        !if(Is2Addr,
5438*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5439*0b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5440*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5441*0b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5442*0b57cec5SDimitry Andric        [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
5443*0b57cec5SDimitry Andric        Sched<[sched]>;
5444*0b57cec5SDimitry Andric
5445*0b57cec5SDimitry Andric  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
5446*0b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
5447*0b57cec5SDimitry Andric        !if(Is2Addr,
5448*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5449*0b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5450*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5451*0b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5452*0b57cec5SDimitry Andric        [(set VR128:$dst,
5453*0b57cec5SDimitry Andric             (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
5454*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
5455*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
5456*0b57cec5SDimitry Andric
5457*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
5458*0b57cec5SDimitry Andric  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
5459*0b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
5460*0b57cec5SDimitry Andric        !if(Is2Addr,
5461*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5462*0b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5463*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5464*0b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5465*0b57cec5SDimitry Andric        [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
5466*0b57cec5SDimitry Andric        Sched<[sched]>;
5467*0b57cec5SDimitry Andric
5468*0b57cec5SDimitry Andric  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
5469*0b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
5470*0b57cec5SDimitry Andric        !if(Is2Addr,
5471*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5472*0b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5473*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5474*0b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5475*0b57cec5SDimitry Andric        [(set VR128:$dst,
5476*0b57cec5SDimitry Andric              (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
5477*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
5478*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
5479*0b57cec5SDimitry Andric}
5480*0b57cec5SDimitry Andric
5481*0b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd
5482*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5483*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
5484*0b57cec5SDimitry Andric    // Intrinsic form
5485*0b57cec5SDimitry Andric    defm VROUNDPS  : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
5486*0b57cec5SDimitry Andric                                     loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
5487*0b57cec5SDimitry Andric                                   VEX, VEX_WIG;
5488*0b57cec5SDimitry Andric    defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
5489*0b57cec5SDimitry Andric                                     loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
5490*0b57cec5SDimitry Andric                                   VEX, VEX_L, VEX_WIG;
5491*0b57cec5SDimitry Andric  }
5492*0b57cec5SDimitry Andric
5493*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
5494*0b57cec5SDimitry Andric    defm VROUNDPD  : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
5495*0b57cec5SDimitry Andric                                     loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
5496*0b57cec5SDimitry Andric                                   VEX, VEX_WIG;
5497*0b57cec5SDimitry Andric    defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
5498*0b57cec5SDimitry Andric                                     loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
5499*0b57cec5SDimitry Andric                                   VEX, VEX_L, VEX_WIG;
5500*0b57cec5SDimitry Andric  }
5501*0b57cec5SDimitry Andric}
5502*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
5503*0b57cec5SDimitry Andric  defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
5504*0b57cec5SDimitry Andric                                  v4f32, v2f64, X86RndScales, 0>,
5505*0b57cec5SDimitry Andric                                  VEX_4V, VEX_LIG, VEX_WIG;
5506*0b57cec5SDimitry Andric  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
5507*0b57cec5SDimitry Andric                                VEX_4V, VEX_LIG, VEX_WIG;
5508*0b57cec5SDimitry Andric}
5509*0b57cec5SDimitry Andric
5510*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
5511*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
5512*0b57cec5SDimitry Andric            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
5513*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
5514*0b57cec5SDimitry Andric            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
5515*0b57cec5SDimitry Andric}
5516*0b57cec5SDimitry Andric
5517*0b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
5518*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
5519*0b57cec5SDimitry Andric            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
5520*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
5521*0b57cec5SDimitry Andric            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
5522*0b57cec5SDimitry Andric}
5523*0b57cec5SDimitry Andric
5524*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
5525*0b57cec5SDimitry Andricdefm ROUNDPS  : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
5526*0b57cec5SDimitry Andric                                memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
5527*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
5528*0b57cec5SDimitry Andricdefm ROUNDPD  : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
5529*0b57cec5SDimitry Andric                                memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
5530*0b57cec5SDimitry Andric
5531*0b57cec5SDimitry Andricdefm ROUND  : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
5532*0b57cec5SDimitry Andric
5533*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
5534*0b57cec5SDimitry Andricdefm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
5535*0b57cec5SDimitry Andric                               v4f32, v2f64, X86RndScales>;
5536*0b57cec5SDimitry Andric
5537*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
5538*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
5539*0b57cec5SDimitry Andric            (ROUNDSSr FR32:$src1, imm:$src2)>;
5540*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
5541*0b57cec5SDimitry Andric            (ROUNDSDr FR64:$src1, imm:$src2)>;
5542*0b57cec5SDimitry Andric}
5543*0b57cec5SDimitry Andric
5544*0b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in {
5545*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
5546*0b57cec5SDimitry Andric            (ROUNDSSm addr:$src1, imm:$src2)>;
5547*0b57cec5SDimitry Andric  def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
5548*0b57cec5SDimitry Andric            (ROUNDSDm addr:$src1, imm:$src2)>;
5549*0b57cec5SDimitry Andric}
5550*0b57cec5SDimitry Andric
5551*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5552*0b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test
5553*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5554*0b57cec5SDimitry Andric
5555*0b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from
5556*0b57cec5SDimitry Andric// the intel intrinsic that corresponds to this.
5557*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
5558*0b57cec5SDimitry Andricdef VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
5559*0b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
5560*0b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
5561*0b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG;
5562*0b57cec5SDimitry Andricdef VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
5563*0b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
5564*0b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
5565*0b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
5566*0b57cec5SDimitry Andric                VEX, VEX_WIG;
5567*0b57cec5SDimitry Andric
5568*0b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
5569*0b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
5570*0b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
5571*0b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG;
5572*0b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
5573*0b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
5574*0b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
5575*0b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
5576*0b57cec5SDimitry Andric                VEX, VEX_L, VEX_WIG;
5577*0b57cec5SDimitry Andric}
5578*0b57cec5SDimitry Andric
5579*0b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
5580*0b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
5581*0b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
5582*0b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
5583*0b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM]>;
5584*0b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
5585*0b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
5586*0b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
5587*0b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
5588*0b57cec5SDimitry Andric}
5589*0b57cec5SDimitry Andric
5590*0b57cec5SDimitry Andric// The bit test instructions below are AVX only
5591*0b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
5592*0b57cec5SDimitry Andric                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
5593*0b57cec5SDimitry Andric                       X86FoldableSchedWrite sched> {
5594*0b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
5595*0b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
5596*0b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
5597*0b57cec5SDimitry Andric            Sched<[sched]>, VEX;
5598*0b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
5599*0b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
5600*0b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
5601*0b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
5602*0b57cec5SDimitry Andric}
5603*0b57cec5SDimitry Andric
5604*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
5605*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
5606*0b57cec5SDimitry Andricdefm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
5607*0b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
5608*0b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
5609*0b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
5610*0b57cec5SDimitry Andric}
5611*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
5612*0b57cec5SDimitry Andricdefm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
5613*0b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
5614*0b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
5615*0b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
5616*0b57cec5SDimitry Andric}
5617*0b57cec5SDimitry Andric}
5618*0b57cec5SDimitry Andric
5619*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5620*0b57cec5SDimitry Andric// SSE4.1 - Misc Instructions
5621*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5622*0b57cec5SDimitry Andric
5623*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
5624*0b57cec5SDimitry Andric  def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
5625*0b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
5626*0b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
5627*0b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize16, XS;
5628*0b57cec5SDimitry Andric  def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
5629*0b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
5630*0b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
5631*0b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
5632*0b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize16, XS;
5633*0b57cec5SDimitry Andric
5634*0b57cec5SDimitry Andric  def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
5635*0b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
5636*0b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
5637*0b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize32, XS;
5638*0b57cec5SDimitry Andric
5639*0b57cec5SDimitry Andric  def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
5640*0b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
5641*0b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
5642*0b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
5643*0b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize32, XS;
5644*0b57cec5SDimitry Andric
5645*0b57cec5SDimitry Andric  def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
5646*0b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
5647*0b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
5648*0b57cec5SDimitry Andric                      Sched<[WritePOPCNT]>, XS;
5649*0b57cec5SDimitry Andric  def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
5650*0b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
5651*0b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
5652*0b57cec5SDimitry Andric                       (implicit EFLAGS)]>,
5653*0b57cec5SDimitry Andric                       Sched<[WritePOPCNT.Folded]>, XS;
5654*0b57cec5SDimitry Andric}
5655*0b57cec5SDimitry Andric
5656*0b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
5657*0b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
5658*0b57cec5SDimitry Andric                                 SDNode OpNode, PatFrag ld_frag,
5659*0b57cec5SDimitry Andric                                 X86FoldableSchedWrite Sched> {
5660*0b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
5661*0b57cec5SDimitry Andric                 (ins VR128:$src),
5662*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5663*0b57cec5SDimitry Andric                 [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>,
5664*0b57cec5SDimitry Andric                 Sched<[Sched]>;
5665*0b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
5666*0b57cec5SDimitry Andric                  (ins i128mem:$src),
5667*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5668*0b57cec5SDimitry Andric                  [(set VR128:$dst,
5669*0b57cec5SDimitry Andric                    (v8i16 (OpNode (ld_frag addr:$src))))]>,
5670*0b57cec5SDimitry Andric                 Sched<[Sched.Folded]>;
5671*0b57cec5SDimitry Andric}
5672*0b57cec5SDimitry Andric
5673*0b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling
5674*0b57cec5SDimitry Andric// model, although the naming is misleading.
5675*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in
5676*0b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
5677*0b57cec5SDimitry Andric                                         X86phminpos, load,
5678*0b57cec5SDimitry Andric                                         WritePHMINPOS>, VEX, VEX_WIG;
5679*0b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
5680*0b57cec5SDimitry Andric                                         X86phminpos, memop,
5681*0b57cec5SDimitry Andric                                         WritePHMINPOS>;
5682*0b57cec5SDimitry Andric
5683*0b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator.
5684*0b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5685*0b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
5686*0b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
5687*0b57cec5SDimitry Andric                          bit Is2Addr = 1> {
5688*0b57cec5SDimitry Andric  let isCommutable = 1 in
5689*0b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
5690*0b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
5691*0b57cec5SDimitry Andric       !if(Is2Addr,
5692*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5693*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5694*0b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
5695*0b57cec5SDimitry Andric       Sched<[sched]>;
5696*0b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
5697*0b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
5698*0b57cec5SDimitry Andric       !if(Is2Addr,
5699*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5700*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5701*0b57cec5SDimitry Andric       [(set RC:$dst,
5702*0b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
5703*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
5704*0b57cec5SDimitry Andric}
5705*0b57cec5SDimitry Andric
5706*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5707*0b57cec5SDimitry Andric  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
5708*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5709*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5710*0b57cec5SDimitry Andric  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
5711*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5712*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5713*0b57cec5SDimitry Andric  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
5714*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5715*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5716*0b57cec5SDimitry Andric  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
5717*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5718*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5719*0b57cec5SDimitry Andric  defm VPMULDQ   : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
5720*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecIMul.XMM, 0>,
5721*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5722*0b57cec5SDimitry Andric}
5723*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
5724*0b57cec5SDimitry Andric  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
5725*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5726*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5727*0b57cec5SDimitry Andric  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
5728*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5729*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5730*0b57cec5SDimitry Andric  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
5731*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5732*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5733*0b57cec5SDimitry Andric  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
5734*0b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
5735*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
5736*0b57cec5SDimitry Andric}
5737*0b57cec5SDimitry Andric
5738*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
5739*0b57cec5SDimitry Andric  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
5740*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5741*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5742*0b57cec5SDimitry Andric  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
5743*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5744*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5745*0b57cec5SDimitry Andric  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
5746*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5747*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5748*0b57cec5SDimitry Andric  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
5749*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5750*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5751*0b57cec5SDimitry Andric  defm VPMULDQY  : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
5752*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecIMul.YMM, 0>,
5753*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5754*0b57cec5SDimitry Andric}
5755*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
5756*0b57cec5SDimitry Andric  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
5757*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5758*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5759*0b57cec5SDimitry Andric  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
5760*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5761*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5762*0b57cec5SDimitry Andric  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
5763*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5764*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5765*0b57cec5SDimitry Andric  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
5766*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5767*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5768*0b57cec5SDimitry Andric}
5769*0b57cec5SDimitry Andric
5770*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
5771*0b57cec5SDimitry Andric  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
5772*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5773*0b57cec5SDimitry Andric  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
5774*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5775*0b57cec5SDimitry Andric  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
5776*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5777*0b57cec5SDimitry Andric  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
5778*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5779*0b57cec5SDimitry Andric  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
5780*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5781*0b57cec5SDimitry Andric  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
5782*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5783*0b57cec5SDimitry Andric  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
5784*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5785*0b57cec5SDimitry Andric  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
5786*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
5787*0b57cec5SDimitry Andric  defm PMULDQ   : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
5788*0b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecIMul.XMM, 1>;
5789*0b57cec5SDimitry Andric}
5790*0b57cec5SDimitry Andric
5791*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
5792*0b57cec5SDimitry Andric  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
5793*0b57cec5SDimitry Andric                                 load, i128mem, SchedWritePMULLD.XMM, 0>,
5794*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
5795*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in
5796*0b57cec5SDimitry Andric  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
5797*0b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
5798*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
5799*0b57cec5SDimitry Andric
5800*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
5801*0b57cec5SDimitry Andric  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
5802*0b57cec5SDimitry Andric                                  load, i256mem, SchedWritePMULLD.YMM, 0>,
5803*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5804*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
5805*0b57cec5SDimitry Andric  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
5806*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
5807*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
5808*0b57cec5SDimitry Andric
5809*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
5810*0b57cec5SDimitry Andric  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
5811*0b57cec5SDimitry Andric                                memop, i128mem, SchedWritePMULLD.XMM, 1>;
5812*0b57cec5SDimitry Andric  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
5813*0b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM, 1>;
5814*0b57cec5SDimitry Andric}
5815*0b57cec5SDimitry Andric
5816*0b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
5817*0b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
5818*0b57cec5SDimitry Andric                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
5819*0b57cec5SDimitry Andric                 X86MemOperand x86memop, bit Is2Addr,
5820*0b57cec5SDimitry Andric                 X86FoldableSchedWrite sched> {
5821*0b57cec5SDimitry Andric  let isCommutable = 1 in
5822*0b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
5823*0b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
5824*0b57cec5SDimitry Andric        !if(Is2Addr,
5825*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5826*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5827*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5828*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5829*0b57cec5SDimitry Andric        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
5830*0b57cec5SDimitry Andric        Sched<[sched]>;
5831*0b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
5832*0b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
5833*0b57cec5SDimitry Andric        !if(Is2Addr,
5834*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5835*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5836*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5837*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5838*0b57cec5SDimitry Andric        [(set RC:$dst,
5839*0b57cec5SDimitry Andric          (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
5840*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
5841*0b57cec5SDimitry Andric}
5842*0b57cec5SDimitry Andric
5843*0b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
5844*0b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
5845*0b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
5846*0b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr,
5847*0b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
5848*0b57cec5SDimitry Andric  let isCommutable = 1 in
5849*0b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
5850*0b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
5851*0b57cec5SDimitry Andric        !if(Is2Addr,
5852*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5853*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5854*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5855*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5856*0b57cec5SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
5857*0b57cec5SDimitry Andric        Sched<[sched]>;
5858*0b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
5859*0b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
5860*0b57cec5SDimitry Andric        !if(Is2Addr,
5861*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5862*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5863*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
5864*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5865*0b57cec5SDimitry Andric        [(set RC:$dst,
5866*0b57cec5SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
5867*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
5868*0b57cec5SDimitry Andric}
5869*0b57cec5SDimitry Andric
5870*0b57cec5SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<imm, [{
5871*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x03;
5872*0b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x03, SDLoc(N));
5873*0b57cec5SDimitry Andric}]>;
5874*0b57cec5SDimitry Andric
5875*0b57cec5SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<imm, [{
5876*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x0f;
5877*0b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x0f, SDLoc(N));
5878*0b57cec5SDimitry Andric}]>;
5879*0b57cec5SDimitry Andric
5880*0b57cec5SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<imm, [{
5881*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0xff;
5882*0b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0xff, SDLoc(N));
5883*0b57cec5SDimitry Andric}]>;
5884*0b57cec5SDimitry Andric
5885*0b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
5886*0b57cec5SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<imm, [{
5887*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5888*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5889*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
5890*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5891*0b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
5892*0b57cec5SDimitry Andric  }
5893*0b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
5894*0b57cec5SDimitry Andric}]>;
5895*0b57cec5SDimitry Andric
5896*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
5897*0b57cec5SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<imm, [{
5898*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5899*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5900*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
5901*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5902*0b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
5903*0b57cec5SDimitry Andric  }
5904*0b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
5905*0b57cec5SDimitry Andric}]>;
5906*0b57cec5SDimitry Andric
5907*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
5908*0b57cec5SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<imm, [{
5909*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5910*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5911*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
5912*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5913*0b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
5914*0b57cec5SDimitry Andric  }
5915*0b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
5916*0b57cec5SDimitry Andric}]>;
5917*0b57cec5SDimitry Andric
5918*0b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
5919*0b57cec5SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<imm, [{
5920*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5921*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5922*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
5923*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5924*0b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
5925*0b57cec5SDimitry Andric  }
5926*0b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
5927*0b57cec5SDimitry Andric}]>;
5928*0b57cec5SDimitry Andric
5929*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
5930*0b57cec5SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<imm, [{
5931*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5932*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5933*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
5934*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5935*0b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
5936*0b57cec5SDimitry Andric  }
5937*0b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
5938*0b57cec5SDimitry Andric}]>;
5939*0b57cec5SDimitry Andric
5940*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
5941*0b57cec5SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<imm, [{
5942*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
5943*0b57cec5SDimitry Andric  uint8_t NewImm = 0;
5944*0b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
5945*0b57cec5SDimitry Andric    if (Imm & (1 << i))
5946*0b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
5947*0b57cec5SDimitry Andric  }
5948*0b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xf, SDLoc(N));
5949*0b57cec5SDimitry Andric}]>;
5950*0b57cec5SDimitry Andric
5951*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
5952*0b57cec5SDimitry Andric  let isCommutable = 0 in {
5953*0b57cec5SDimitry Andric    defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
5954*0b57cec5SDimitry Andric                                        VR128, load, i128mem, 0,
5955*0b57cec5SDimitry Andric                                        SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
5956*0b57cec5SDimitry Andric  }
5957*0b57cec5SDimitry Andric
5958*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
5959*0b57cec5SDimitry Andric  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
5960*0b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
5961*0b57cec5SDimitry Andric                                   SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
5962*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
5963*0b57cec5SDimitry Andric  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
5964*0b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
5965*0b57cec5SDimitry Andric                                   SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
5966*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
5967*0b57cec5SDimitry Andric  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
5968*0b57cec5SDimitry Andric                                    VR256, load, i256mem, 0,
5969*0b57cec5SDimitry Andric                                    SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
5970*0b57cec5SDimitry Andric}
5971*0b57cec5SDimitry Andric
5972*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
5973*0b57cec5SDimitry Andric  let isCommutable = 0 in {
5974*0b57cec5SDimitry Andric  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
5975*0b57cec5SDimitry Andric                                  VR256, load, i256mem, 0,
5976*0b57cec5SDimitry Andric                                  SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
5977*0b57cec5SDimitry Andric  }
5978*0b57cec5SDimitry Andric}
5979*0b57cec5SDimitry Andric
5980*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
5981*0b57cec5SDimitry Andric  let isCommutable = 0 in {
5982*0b57cec5SDimitry Andric  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
5983*0b57cec5SDimitry Andric                                     VR128, memop, i128mem, 1,
5984*0b57cec5SDimitry Andric                                     SchedWriteMPSAD.XMM>;
5985*0b57cec5SDimitry Andric  }
5986*0b57cec5SDimitry Andric
5987*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
5988*0b57cec5SDimitry Andric  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
5989*0b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
5990*0b57cec5SDimitry Andric                                  SchedWriteDPPS.XMM>;
5991*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
5992*0b57cec5SDimitry Andric  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
5993*0b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
5994*0b57cec5SDimitry Andric                                  SchedWriteDPPD.XMM>;
5995*0b57cec5SDimitry Andric}
5996*0b57cec5SDimitry Andric
5997*0b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
5998*0b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
5999*0b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6000*0b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr, Domain d,
6001*0b57cec5SDimitry Andric                           X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> {
6002*0b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
6003*0b57cec5SDimitry Andric  let isCommutable = 1 in
6004*0b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
6005*0b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
6006*0b57cec5SDimitry Andric        !if(Is2Addr,
6007*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
6008*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6009*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
6010*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6011*0b57cec5SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
6012*0b57cec5SDimitry Andric        Sched<[sched]>;
6013*0b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
6014*0b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
6015*0b57cec5SDimitry Andric        !if(Is2Addr,
6016*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
6017*0b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6018*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
6019*0b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6020*0b57cec5SDimitry Andric        [(set RC:$dst,
6021*0b57cec5SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
6022*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
6023*0b57cec5SDimitry Andric}
6024*0b57cec5SDimitry Andric
6025*0b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
6026*0b57cec5SDimitry Andric  def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
6027*0b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
6028*0b57cec5SDimitry Andric                                            (commuteXForm imm:$src3))>;
6029*0b57cec5SDimitry Andric}
6030*0b57cec5SDimitry Andric
6031*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
6032*0b57cec5SDimitry Andric  defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
6033*0b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedSingle,
6034*0b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm4>,
6035*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
6036*0b57cec5SDimitry Andric  defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
6037*0b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedSingle,
6038*0b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm8>,
6039*0b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
6040*0b57cec5SDimitry Andric  defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
6041*0b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedDouble,
6042*0b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm2>,
6043*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
6044*0b57cec5SDimitry Andric  defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
6045*0b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedDouble,
6046*0b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm4>,
6047*0b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
6048*0b57cec5SDimitry Andric  defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
6049*0b57cec5SDimitry Andric                                  VR128, load, i128mem, 0, SSEPackedInt,
6050*0b57cec5SDimitry Andric                                  SchedWriteBlend.XMM, BlendCommuteImm8>,
6051*0b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
6052*0b57cec5SDimitry Andric}
6053*0b57cec5SDimitry Andric
6054*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
6055*0b57cec5SDimitry Andric  defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
6056*0b57cec5SDimitry Andric                                   VR256, load, i256mem, 0, SSEPackedInt,
6057*0b57cec5SDimitry Andric                                   SchedWriteBlend.YMM, BlendCommuteImm8>,
6058*0b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
6059*0b57cec5SDimitry Andric}
6060*0b57cec5SDimitry Andric
6061*0b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
6062*0b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on.
6063*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
6064*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
6065*0b57cec5SDimitry Andric          (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6066*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
6067*0b57cec5SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6068*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
6069*0b57cec5SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
6070*0b57cec5SDimitry Andric
6071*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
6072*0b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize.
6073*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
6074*0b57cec5SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
6075*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
6076*0b57cec5SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
6077*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
6078*0b57cec5SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
6079*0b57cec5SDimitry Andric
6080*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
6081*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
6082*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
6083*0b57cec5SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
6084*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
6085*0b57cec5SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
6086*0b57cec5SDimitry Andric
6087*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
6088*0b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
6089*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
6090*0b57cec5SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
6091*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
6092*0b57cec5SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
6093*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
6094*0b57cec5SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
6095*0b57cec5SDimitry Andric}
6096*0b57cec5SDimitry Andric
6097*0b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
6098*0b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedSingle,
6099*0b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm4>;
6100*0b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
6101*0b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedDouble,
6102*0b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm2>;
6103*0b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
6104*0b57cec5SDimitry Andric                               VR128, memop, i128mem, 1, SSEPackedInt,
6105*0b57cec5SDimitry Andric                               SchedWriteBlend.XMM, BlendCommuteImm8>;
6106*0b57cec5SDimitry Andric
6107*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
6108*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
6109*0b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
6110*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
6111*0b57cec5SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
6112*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
6113*0b57cec5SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
6114*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
6115*0b57cec5SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
6116*0b57cec5SDimitry Andric
6117*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
6118*0b57cec5SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
6119*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
6120*0b57cec5SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
6121*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
6122*0b57cec5SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
6123*0b57cec5SDimitry Andric}
6124*0b57cec5SDimitry Andric
6125*0b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
6126*0b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
6127*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
6128*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
6129*0b57cec5SDimitry Andric          (VBLENDPDYrri VR256:$src1,
6130*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
6131*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0x3)>;
6132*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
6133*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
6134*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
6135*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
6136*0b57cec5SDimitry Andric
6137*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
6138*0b57cec5SDimitry Andric          (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
6139*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
6140*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
6141*0b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
6142*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
6143*0b57cec5SDimitry Andric}
6144*0b57cec5SDimitry Andric
6145*0b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
6146*0b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
6147*0b57cec5SDimitry Andric                                X86MemOperand x86memop, ValueType VT,
6148*0b57cec5SDimitry Andric                                PatFrag mem_frag, SDNode OpNode,
6149*0b57cec5SDimitry Andric                                X86FoldableSchedWrite sched> {
6150*0b57cec5SDimitry Andric  def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
6151*0b57cec5SDimitry Andric                  (ins RC:$src1, RC:$src2, RC:$src3),
6152*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
6153*0b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6154*0b57cec5SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
6155*0b57cec5SDimitry Andric                  SSEPackedInt>, TAPD, VEX_4V,
6156*0b57cec5SDimitry Andric                Sched<[sched]>;
6157*0b57cec5SDimitry Andric
6158*0b57cec5SDimitry Andric  def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
6159*0b57cec5SDimitry Andric                  (ins RC:$src1, x86memop:$src2, RC:$src3),
6160*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
6161*0b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6162*0b57cec5SDimitry Andric                  [(set RC:$dst,
6163*0b57cec5SDimitry Andric                        (OpNode RC:$src3, (mem_frag addr:$src2),
6164*0b57cec5SDimitry Andric                                RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
6165*0b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold,
6166*0b57cec5SDimitry Andric                       // x86memop:$src2
6167*0b57cec5SDimitry Andric                       ReadDefault, ReadDefault, ReadDefault, ReadDefault,
6168*0b57cec5SDimitry Andric                       ReadDefault,
6169*0b57cec5SDimitry Andric                       // RC::$src3
6170*0b57cec5SDimitry Andric                       sched.ReadAfterFold]>;
6171*0b57cec5SDimitry Andric}
6172*0b57cec5SDimitry Andric
6173*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
6174*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
6175*0b57cec5SDimitry Andricdefm VBLENDVPD  : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
6176*0b57cec5SDimitry Andric                                       v2f64, loadv2f64, X86Blendv,
6177*0b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
6178*0b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
6179*0b57cec5SDimitry Andric                                       v4f64, loadv4f64, X86Blendv,
6180*0b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
6181*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble
6182*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
6183*0b57cec5SDimitry Andricdefm VBLENDVPS  : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
6184*0b57cec5SDimitry Andric                                       v4f32, loadv4f32, X86Blendv,
6185*0b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
6186*0b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
6187*0b57cec5SDimitry Andric                                       v8f32, loadv8f32, X86Blendv,
6188*0b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
6189*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle
6190*0b57cec5SDimitry Andricdefm VPBLENDVB  : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
6191*0b57cec5SDimitry Andric                                       v16i8, loadv16i8, X86Blendv,
6192*0b57cec5SDimitry Andric                                       SchedWriteVarBlend.XMM>;
6193*0b57cec5SDimitry Andric}
6194*0b57cec5SDimitry Andric
6195*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
6196*0b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
6197*0b57cec5SDimitry Andric                                       v32i8, loadv32i8, X86Blendv,
6198*0b57cec5SDimitry Andric                                       SchedWriteVarBlend.YMM>, VEX_L;
6199*0b57cec5SDimitry Andric}
6200*0b57cec5SDimitry Andric
6201*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
6202*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
6203*0b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
6204*0b57cec5SDimitry Andric            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6205*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
6206*0b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
6207*0b57cec5SDimitry Andric            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6208*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
6209*0b57cec5SDimitry Andric                              (v8i32 VR256:$src2))),
6210*0b57cec5SDimitry Andric            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6211*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
6212*0b57cec5SDimitry Andric                              (v4i64 VR256:$src2))),
6213*0b57cec5SDimitry Andric            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6214*0b57cec5SDimitry Andric}
6215*0b57cec5SDimitry Andric
6216*0b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
6217*0b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
6218*0b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
6219*0b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in {
6220*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
6221*0b57cec5SDimitry Andric            (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
6222*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
6223*0b57cec5SDimitry Andric            (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
6224*0b57cec5SDimitry Andric
6225*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
6226*0b57cec5SDimitry Andric            (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
6227*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))),
6228*0b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
6229*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)),
6230*0b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
6231*0b57cec5SDimitry Andric
6232*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
6233*0b57cec5SDimitry Andric            (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
6234*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))),
6235*0b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
6236*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)),
6237*0b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
6238*0b57cec5SDimitry Andric
6239*0b57cec5SDimitry Andric  // Move low f32 and clear high bits.
6240*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
6241*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
6242*0b57cec5SDimitry Andric             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
6243*0b57cec5SDimitry Andric                          (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)),
6244*0b57cec5SDimitry Andric                          (i8 1))), sub_xmm)>;
6245*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
6246*0b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
6247*0b57cec5SDimitry Andric             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
6248*0b57cec5SDimitry Andric                          (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
6249*0b57cec5SDimitry Andric                          (i8 3))), sub_xmm)>;
6250*0b57cec5SDimitry Andric}
6251*0b57cec5SDimitry Andric
6252*0b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
6253*0b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
6254*0b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
6255*0b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in {
6256*0b57cec5SDimitry Andric  // With SSE41 we can use blends for these patterns.
6257*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
6258*0b57cec5SDimitry Andric            (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
6259*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
6260*0b57cec5SDimitry Andric            (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
6261*0b57cec5SDimitry Andric
6262*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
6263*0b57cec5SDimitry Andric            (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
6264*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))),
6265*0b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
6266*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)),
6267*0b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
6268*0b57cec5SDimitry Andric
6269*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
6270*0b57cec5SDimitry Andric            (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
6271*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))),
6272*0b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
6273*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)),
6274*0b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
6275*0b57cec5SDimitry Andric}
6276*0b57cec5SDimitry Andric
6277*0b57cec5SDimitry Andric
6278*0b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator
6279*0b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in {
6280*0b57cec5SDimitry Andric  multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
6281*0b57cec5SDimitry Andric                           PatFrag mem_frag, X86MemOperand x86memop,
6282*0b57cec5SDimitry Andric                           SDNode OpNode, X86FoldableSchedWrite sched> {
6283*0b57cec5SDimitry Andric    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6284*0b57cec5SDimitry Andric                    (ins VR128:$src1, VR128:$src2),
6285*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
6286*0b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
6287*0b57cec5SDimitry Andric                    [(set VR128:$dst,
6288*0b57cec5SDimitry Andric                      (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
6289*0b57cec5SDimitry Andric                    Sched<[sched]>;
6290*0b57cec5SDimitry Andric
6291*0b57cec5SDimitry Andric    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6292*0b57cec5SDimitry Andric                    (ins VR128:$src1, x86memop:$src2),
6293*0b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
6294*0b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
6295*0b57cec5SDimitry Andric                    [(set VR128:$dst,
6296*0b57cec5SDimitry Andric                      (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
6297*0b57cec5SDimitry Andric                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6298*0b57cec5SDimitry Andric  }
6299*0b57cec5SDimitry Andric}
6300*0b57cec5SDimitry Andric
6301*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
6302*0b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
6303*0b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
6304*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
6305*0b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
6306*0b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
6307*0b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
6308*0b57cec5SDimitry Andric                              X86Blendv, SchedWriteVarBlend.XMM>;
6309*0b57cec5SDimitry Andric
6310*0b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument
6311*0b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
6312*0b57cec5SDimitry Andric                (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
6313*0b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
6314*0b57cec5SDimitry Andric                (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
6315*0b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
6316*0b57cec5SDimitry Andric                (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
6317*0b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
6318*0b57cec5SDimitry Andric                (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
6319*0b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
6320*0b57cec5SDimitry Andric                (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
6321*0b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
6322*0b57cec5SDimitry Andric                (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
6323*0b57cec5SDimitry Andric
6324*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
6325*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
6326*0b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
6327*0b57cec5SDimitry Andric            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
6328*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
6329*0b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
6330*0b57cec5SDimitry Andric            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
6331*0b57cec5SDimitry Andric}
6332*0b57cec5SDimitry Andric
6333*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
6334*0b57cec5SDimitry Andric
6335*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
6336*0b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6337*0b57cec5SDimitry Andric                        "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
6338*0b57cec5SDimitry Andric                        Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
6339*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
6340*0b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
6341*0b57cec5SDimitry Andric                         "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
6342*0b57cec5SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
6343*0b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6344*0b57cec5SDimitry Andric                       "movntdqa\t{$src, $dst|$dst, $src}", []>,
6345*0b57cec5SDimitry Andric                       Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
6346*0b57cec5SDimitry Andric
6347*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
6348*0b57cec5SDimitry Andric  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
6349*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6350*0b57cec5SDimitry Andric  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
6351*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6352*0b57cec5SDimitry Andric  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
6353*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6354*0b57cec5SDimitry Andric  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
6355*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6356*0b57cec5SDimitry Andric  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
6357*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6358*0b57cec5SDimitry Andric  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
6359*0b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6360*0b57cec5SDimitry Andric}
6361*0b57cec5SDimitry Andric
6362*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
6363*0b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
6364*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6365*0b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
6366*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6367*0b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
6368*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6369*0b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
6370*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6371*0b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
6372*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6373*0b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
6374*0b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6375*0b57cec5SDimitry Andric}
6376*0b57cec5SDimitry Andric
6377*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
6378*0b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
6379*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6380*0b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
6381*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6382*0b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
6383*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6384*0b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
6385*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6386*0b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
6387*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6388*0b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
6389*0b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6390*0b57cec5SDimitry Andric}
6391*0b57cec5SDimitry Andric
6392*0b57cec5SDimitry Andric} // AddedComplexity
6393*0b57cec5SDimitry Andric
6394*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6395*0b57cec5SDimitry Andric// SSE4.2 - Compare Instructions
6396*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6397*0b57cec5SDimitry Andric
6398*0b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator
6399*0b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6400*0b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6401*0b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
6402*0b57cec5SDimitry Andric                          bit Is2Addr = 1> {
6403*0b57cec5SDimitry Andric  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
6404*0b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
6405*0b57cec5SDimitry Andric       !if(Is2Addr,
6406*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6407*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6408*0b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
6409*0b57cec5SDimitry Andric       Sched<[sched]>;
6410*0b57cec5SDimitry Andric  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
6411*0b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
6412*0b57cec5SDimitry Andric       !if(Is2Addr,
6413*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6414*0b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6415*0b57cec5SDimitry Andric       [(set RC:$dst,
6416*0b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
6417*0b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
6418*0b57cec5SDimitry Andric}
6419*0b57cec5SDimitry Andric
6420*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in
6421*0b57cec5SDimitry Andric  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
6422*0b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
6423*0b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
6424*0b57cec5SDimitry Andric
6425*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
6426*0b57cec5SDimitry Andric  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
6427*0b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
6428*0b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
6429*0b57cec5SDimitry Andric
6430*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
6431*0b57cec5SDimitry Andric  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
6432*0b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM>;
6433*0b57cec5SDimitry Andric
6434*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6435*0b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions
6436*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6437*0b57cec5SDimitry Andric
6438*0b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> {
6439*0b57cec5SDimitry Andric  def rr : SS42AI<0x62, MRMSrcReg, (outs),
6440*0b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
6441*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6442*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM]>;
6443*0b57cec5SDimitry Andric  let mayLoad = 1 in
6444*0b57cec5SDimitry Andric  def rm :SS42AI<0x62, MRMSrcMem, (outs),
6445*0b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
6446*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6447*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
6448*0b57cec5SDimitry Andric}
6449*0b57cec5SDimitry Andric
6450*0b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
6451*0b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6452*0b57cec5SDimitry Andric  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
6453*0b57cec5SDimitry Andric  defm PCMPISTRM  : pcmpistrm_SS42AI<"pcmpistrm"> ;
6454*0b57cec5SDimitry Andric}
6455*0b57cec5SDimitry Andric
6456*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> {
6457*0b57cec5SDimitry Andric  def rr : SS42AI<0x60, MRMSrcReg, (outs),
6458*0b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
6459*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6460*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM]>;
6461*0b57cec5SDimitry Andric  let mayLoad = 1 in
6462*0b57cec5SDimitry Andric  def rm : SS42AI<0x60, MRMSrcMem, (outs),
6463*0b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
6464*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6465*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
6466*0b57cec5SDimitry Andric}
6467*0b57cec5SDimitry Andric
6468*0b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
6469*0b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6470*0b57cec5SDimitry Andric  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
6471*0b57cec5SDimitry Andric  defm PCMPESTRM :  SS42AI_pcmpestrm<"pcmpestrm">;
6472*0b57cec5SDimitry Andric}
6473*0b57cec5SDimitry Andric
6474*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> {
6475*0b57cec5SDimitry Andric  def rr : SS42AI<0x63, MRMSrcReg, (outs),
6476*0b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
6477*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6478*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI]>;
6479*0b57cec5SDimitry Andric  let mayLoad = 1 in
6480*0b57cec5SDimitry Andric  def rm : SS42AI<0x63, MRMSrcMem, (outs),
6481*0b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
6482*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6483*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
6484*0b57cec5SDimitry Andric}
6485*0b57cec5SDimitry Andric
6486*0b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
6487*0b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6488*0b57cec5SDimitry Andric  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
6489*0b57cec5SDimitry Andric  defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
6490*0b57cec5SDimitry Andric}
6491*0b57cec5SDimitry Andric
6492*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> {
6493*0b57cec5SDimitry Andric  def rr : SS42AI<0x61, MRMSrcReg, (outs),
6494*0b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
6495*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6496*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI]>;
6497*0b57cec5SDimitry Andric  let mayLoad = 1 in
6498*0b57cec5SDimitry Andric  def rm : SS42AI<0x61, MRMSrcMem, (outs),
6499*0b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
6500*0b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
6501*0b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
6502*0b57cec5SDimitry Andric}
6503*0b57cec5SDimitry Andric
6504*0b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
6505*0b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6506*0b57cec5SDimitry Andric  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
6507*0b57cec5SDimitry Andric  defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
6508*0b57cec5SDimitry Andric}
6509*0b57cec5SDimitry Andric
6510*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6511*0b57cec5SDimitry Andric// SSE4.2 - CRC Instructions
6512*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6513*0b57cec5SDimitry Andric
6514*0b57cec5SDimitry Andric// No CRC instructions have AVX equivalents
6515*0b57cec5SDimitry Andric
6516*0b57cec5SDimitry Andric// crc intrinsic instruction
6517*0b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size
6518*0b57cec5SDimitry Andric// of r and m.
6519*0b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
6520*0b57cec5SDimitry Andric                   RegisterClass RCIn, SDPatternOperator Int> :
6521*0b57cec5SDimitry Andric  SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
6522*0b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
6523*0b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
6524*0b57cec5SDimitry Andric         Sched<[WriteCRC32]>;
6525*0b57cec5SDimitry Andric
6526*0b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
6527*0b57cec5SDimitry Andric                   X86MemOperand x86memop, SDPatternOperator Int> :
6528*0b57cec5SDimitry Andric  SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
6529*0b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
6530*0b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
6531*0b57cec5SDimitry Andric         Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
6532*0b57cec5SDimitry Andric
6533*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
6534*0b57cec5SDimitry Andric  def CRC32r32m8  : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
6535*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
6536*0b57cec5SDimitry Andric  def CRC32r32r8  : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
6537*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
6538*0b57cec5SDimitry Andric  def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
6539*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
6540*0b57cec5SDimitry Andric  def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
6541*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
6542*0b57cec5SDimitry Andric  def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
6543*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
6544*0b57cec5SDimitry Andric  def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
6545*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
6546*0b57cec5SDimitry Andric  def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
6547*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
6548*0b57cec5SDimitry Andric  def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
6549*0b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
6550*0b57cec5SDimitry Andric  let hasSideEffects = 0 in {
6551*0b57cec5SDimitry Andric    let mayLoad = 1 in
6552*0b57cec5SDimitry Andric    def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
6553*0b57cec5SDimitry Andric                                   null_frag>, REX_W;
6554*0b57cec5SDimitry Andric    def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
6555*0b57cec5SDimitry Andric                                   null_frag>, REX_W;
6556*0b57cec5SDimitry Andric  }
6557*0b57cec5SDimitry Andric}
6558*0b57cec5SDimitry Andric
6559*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6560*0b57cec5SDimitry Andric// SHA-NI Instructions
6561*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6562*0b57cec5SDimitry Andric
6563*0b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
6564*0b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
6565*0b57cec5SDimitry Andric                      X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
6566*0b57cec5SDimitry Andric  def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
6567*0b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
6568*0b57cec5SDimitry Andric             !if(UsesXMM0,
6569*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
6570*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
6571*0b57cec5SDimitry Andric             [!if(UsesXMM0,
6572*0b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
6573*0b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
6574*0b57cec5SDimitry Andric             T8, Sched<[sched]>;
6575*0b57cec5SDimitry Andric
6576*0b57cec5SDimitry Andric  def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
6577*0b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
6578*0b57cec5SDimitry Andric             !if(UsesXMM0,
6579*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
6580*0b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
6581*0b57cec5SDimitry Andric             [!if(UsesXMM0,
6582*0b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
6583*0b57cec5SDimitry Andric                    (memop addr:$src2), XMM0)),
6584*0b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
6585*0b57cec5SDimitry Andric                    (memop addr:$src2))))]>, T8,
6586*0b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
6587*0b57cec5SDimitry Andric}
6588*0b57cec5SDimitry Andric
6589*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
6590*0b57cec5SDimitry Andric  def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
6591*0b57cec5SDimitry Andric                         (ins VR128:$src1, VR128:$src2, u8imm:$src3),
6592*0b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6593*0b57cec5SDimitry Andric                         [(set VR128:$dst,
6594*0b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
6595*0b57cec5SDimitry Andric                            (i8 imm:$src3)))]>, TA,
6596*0b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM]>;
6597*0b57cec5SDimitry Andric  def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
6598*0b57cec5SDimitry Andric                         (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
6599*0b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6600*0b57cec5SDimitry Andric                         [(set VR128:$dst,
6601*0b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1,
6602*0b57cec5SDimitry Andric                            (memop addr:$src2),
6603*0b57cec5SDimitry Andric                            (i8 imm:$src3)))]>, TA,
6604*0b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM.Folded,
6605*0b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM.ReadAfterFold]>;
6606*0b57cec5SDimitry Andric
6607*0b57cec5SDimitry Andric  defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
6608*0b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
6609*0b57cec5SDimitry Andric  defm SHA1MSG1  : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
6610*0b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
6611*0b57cec5SDimitry Andric  defm SHA1MSG2  : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
6612*0b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
6613*0b57cec5SDimitry Andric
6614*0b57cec5SDimitry Andric  let Uses=[XMM0] in
6615*0b57cec5SDimitry Andric  defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
6616*0b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM, 1>;
6617*0b57cec5SDimitry Andric
6618*0b57cec5SDimitry Andric  defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
6619*0b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
6620*0b57cec5SDimitry Andric  defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
6621*0b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
6622*0b57cec5SDimitry Andric}
6623*0b57cec5SDimitry Andric
6624*0b57cec5SDimitry Andric// Aliases with explicit %xmm0
6625*0b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
6626*0b57cec5SDimitry Andric                (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
6627*0b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
6628*0b57cec5SDimitry Andric                (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
6629*0b57cec5SDimitry Andric
6630*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6631*0b57cec5SDimitry Andric// AES-NI Instructions
6632*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6633*0b57cec5SDimitry Andric
6634*0b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
6635*0b57cec5SDimitry Andric                             Intrinsic IntId, PatFrag ld_frag,
6636*0b57cec5SDimitry Andric                             bit Is2Addr = 0, RegisterClass RC = VR128,
6637*0b57cec5SDimitry Andric                             X86MemOperand MemOp = i128mem> {
6638*0b57cec5SDimitry Andric  let AsmString = OpcodeStr##
6639*0b57cec5SDimitry Andric                  !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}",
6640*0b57cec5SDimitry Andric                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
6641*0b57cec5SDimitry Andric    def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst),
6642*0b57cec5SDimitry Andric                   (ins RC:$src1, RC:$src2), "",
6643*0b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>,
6644*0b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc]>;
6645*0b57cec5SDimitry Andric    def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
6646*0b57cec5SDimitry Andric                   (ins RC:$src1, MemOp:$src2), "",
6647*0b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
6648*0b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
6649*0b57cec5SDimitry Andric  }
6650*0b57cec5SDimitry Andric}
6651*0b57cec5SDimitry Andric
6652*0b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow
6653*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
6654*0b57cec5SDimitry Andric  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
6655*0b57cec5SDimitry Andric                         int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
6656*0b57cec5SDimitry Andric  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
6657*0b57cec5SDimitry Andric                         int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
6658*0b57cec5SDimitry Andric  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
6659*0b57cec5SDimitry Andric                         int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
6660*0b57cec5SDimitry Andric  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
6661*0b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
6662*0b57cec5SDimitry Andric}
6663*0b57cec5SDimitry Andric
6664*0b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in {
6665*0b57cec5SDimitry Andric  defm VAESENCY         : AESI_binop_rm_int<0xDC, "vaesenc",
6666*0b57cec5SDimitry Andric                         int_x86_aesni_aesenc_256, load, 0, VR256,
6667*0b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
6668*0b57cec5SDimitry Andric  defm VAESENCLASTY     : AESI_binop_rm_int<0xDD, "vaesenclast",
6669*0b57cec5SDimitry Andric                         int_x86_aesni_aesenclast_256, load, 0, VR256,
6670*0b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
6671*0b57cec5SDimitry Andric  defm VAESDECY         : AESI_binop_rm_int<0xDE, "vaesdec",
6672*0b57cec5SDimitry Andric                         int_x86_aesni_aesdec_256, load, 0, VR256,
6673*0b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
6674*0b57cec5SDimitry Andric  defm VAESDECLASTY     : AESI_binop_rm_int<0xDF, "vaesdeclast",
6675*0b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast_256, load, 0, VR256,
6676*0b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
6677*0b57cec5SDimitry Andric}
6678*0b57cec5SDimitry Andric
6679*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
6680*0b57cec5SDimitry Andric  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
6681*0b57cec5SDimitry Andric                         int_x86_aesni_aesenc, memop, 1>;
6682*0b57cec5SDimitry Andric  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
6683*0b57cec5SDimitry Andric                         int_x86_aesni_aesenclast, memop, 1>;
6684*0b57cec5SDimitry Andric  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
6685*0b57cec5SDimitry Andric                         int_x86_aesni_aesdec, memop, 1>;
6686*0b57cec5SDimitry Andric  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
6687*0b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast, memop, 1>;
6688*0b57cec5SDimitry Andric}
6689*0b57cec5SDimitry Andric
6690*0b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation
6691*0b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
6692*0b57cec5SDimitry Andric  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
6693*0b57cec5SDimitry Andric      (ins VR128:$src1),
6694*0b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
6695*0b57cec5SDimitry Andric      [(set VR128:$dst,
6696*0b57cec5SDimitry Andric        (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
6697*0b57cec5SDimitry Andric      VEX, VEX_WIG;
6698*0b57cec5SDimitry Andric  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
6699*0b57cec5SDimitry Andric      (ins i128mem:$src1),
6700*0b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
6701*0b57cec5SDimitry Andric      [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
6702*0b57cec5SDimitry Andric      Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
6703*0b57cec5SDimitry Andric}
6704*0b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
6705*0b57cec5SDimitry Andric  (ins VR128:$src1),
6706*0b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
6707*0b57cec5SDimitry Andric  [(set VR128:$dst,
6708*0b57cec5SDimitry Andric    (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>;
6709*0b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
6710*0b57cec5SDimitry Andric  (ins i128mem:$src1),
6711*0b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
6712*0b57cec5SDimitry Andric  [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
6713*0b57cec5SDimitry Andric  Sched<[WriteAESIMC.Folded]>;
6714*0b57cec5SDimitry Andric
6715*0b57cec5SDimitry Andric// AES Round Key Generation Assist
6716*0b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
6717*0b57cec5SDimitry Andric  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
6718*0b57cec5SDimitry Andric      (ins VR128:$src1, u8imm:$src2),
6719*0b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6720*0b57cec5SDimitry Andric      [(set VR128:$dst,
6721*0b57cec5SDimitry Andric        (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
6722*0b57cec5SDimitry Andric      Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
6723*0b57cec5SDimitry Andric  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
6724*0b57cec5SDimitry Andric      (ins i128mem:$src1, u8imm:$src2),
6725*0b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6726*0b57cec5SDimitry Andric      [(set VR128:$dst,
6727*0b57cec5SDimitry Andric        (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
6728*0b57cec5SDimitry Andric      Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
6729*0b57cec5SDimitry Andric}
6730*0b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
6731*0b57cec5SDimitry Andric  (ins VR128:$src1, u8imm:$src2),
6732*0b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6733*0b57cec5SDimitry Andric  [(set VR128:$dst,
6734*0b57cec5SDimitry Andric    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
6735*0b57cec5SDimitry Andric  Sched<[WriteAESKeyGen]>;
6736*0b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
6737*0b57cec5SDimitry Andric  (ins i128mem:$src1, u8imm:$src2),
6738*0b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6739*0b57cec5SDimitry Andric  [(set VR128:$dst,
6740*0b57cec5SDimitry Andric    (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
6741*0b57cec5SDimitry Andric  Sched<[WriteAESKeyGen.Folded]>;
6742*0b57cec5SDimitry Andric
6743*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6744*0b57cec5SDimitry Andric// PCLMUL Instructions
6745*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6746*0b57cec5SDimitry Andric
6747*0b57cec5SDimitry Andric// Immediate transform to help with commuting.
6748*0b57cec5SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<imm, [{
6749*0b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
6750*0b57cec5SDimitry Andric  return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
6751*0b57cec5SDimitry Andric}]>;
6752*0b57cec5SDimitry Andric
6753*0b57cec5SDimitry Andric// SSE carry-less Multiplication instructions
6754*0b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in {
6755*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
6756*0b57cec5SDimitry Andric    let isCommutable = 1 in
6757*0b57cec5SDimitry Andric    def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
6758*0b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2, u8imm:$src3),
6759*0b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6760*0b57cec5SDimitry Andric              [(set VR128:$dst,
6761*0b57cec5SDimitry Andric                (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
6762*0b57cec5SDimitry Andric                Sched<[WriteCLMul]>;
6763*0b57cec5SDimitry Andric
6764*0b57cec5SDimitry Andric    def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
6765*0b57cec5SDimitry Andric              (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
6766*0b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
6767*0b57cec5SDimitry Andric              [(set VR128:$dst,
6768*0b57cec5SDimitry Andric                 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
6769*0b57cec5SDimitry Andric                  imm:$src3))]>,
6770*0b57cec5SDimitry Andric              Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
6771*0b57cec5SDimitry Andric  } // Constraints = "$src1 = $dst"
6772*0b57cec5SDimitry Andric
6773*0b57cec5SDimitry Andric  def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
6774*0b57cec5SDimitry Andric                                (i8 imm:$src3)),
6775*0b57cec5SDimitry Andric            (PCLMULQDQrm VR128:$src1, addr:$src2,
6776*0b57cec5SDimitry Andric                          (PCLMULCommuteImm imm:$src3))>;
6777*0b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL]
6778*0b57cec5SDimitry Andric
6779*0b57cec5SDimitry Andric// SSE aliases
6780*0b57cec5SDimitry Andricforeach HI = ["hq","lq"] in
6781*0b57cec5SDimitry Andricforeach LO = ["hq","lq"] in {
6782*0b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
6783*0b57cec5SDimitry Andric                  (PCLMULQDQrr VR128:$dst, VR128:$src,
6784*0b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
6785*0b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
6786*0b57cec5SDimitry Andric                  (PCLMULQDQrm VR128:$dst, i128mem:$src,
6787*0b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
6788*0b57cec5SDimitry Andric}
6789*0b57cec5SDimitry Andric
6790*0b57cec5SDimitry Andric// AVX carry-less Multiplication instructions
6791*0b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
6792*0b57cec5SDimitry Andric                      PatFrag LdFrag, Intrinsic IntId> {
6793*0b57cec5SDimitry Andric  let isCommutable = 1 in
6794*0b57cec5SDimitry Andric  def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst),
6795*0b57cec5SDimitry Andric            (ins RC:$src1, RC:$src2, u8imm:$src3),
6796*0b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6797*0b57cec5SDimitry Andric            [(set RC:$dst,
6798*0b57cec5SDimitry Andric              (IntId RC:$src1, RC:$src2, imm:$src3))]>,
6799*0b57cec5SDimitry Andric            Sched<[WriteCLMul]>;
6800*0b57cec5SDimitry Andric
6801*0b57cec5SDimitry Andric  def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
6802*0b57cec5SDimitry Andric            (ins RC:$src1, MemOp:$src2, u8imm:$src3),
6803*0b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
6804*0b57cec5SDimitry Andric            [(set RC:$dst,
6805*0b57cec5SDimitry Andric               (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
6806*0b57cec5SDimitry Andric            Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
6807*0b57cec5SDimitry Andric
6808*0b57cec5SDimitry Andric  // We can commute a load in the first operand by swapping the sources and
6809*0b57cec5SDimitry Andric  // rotating the immediate.
6810*0b57cec5SDimitry Andric  def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
6811*0b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
6812*0b57cec5SDimitry Andric                                           (PCLMULCommuteImm imm:$src3))>;
6813*0b57cec5SDimitry Andric}
6814*0b57cec5SDimitry Andric
6815*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
6816*0b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
6817*0b57cec5SDimitry Andric                             int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
6818*0b57cec5SDimitry Andric
6819*0b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in
6820*0b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
6821*0b57cec5SDimitry Andric                              int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
6822*0b57cec5SDimitry Andric
6823*0b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
6824*0b57cec5SDimitry Andric                                   X86MemOperand MemOp, string Hi, string Lo> {
6825*0b57cec5SDimitry Andric  def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6826*0b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2,
6827*0b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
6828*0b57cec5SDimitry Andric  def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6829*0b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2,
6830*0b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
6831*0b57cec5SDimitry Andric}
6832*0b57cec5SDimitry Andric
6833*0b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC,
6834*0b57cec5SDimitry Andric                              X86MemOperand MemOp> {
6835*0b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">;
6836*0b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">;
6837*0b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">;
6838*0b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">;
6839*0b57cec5SDimitry Andric}
6840*0b57cec5SDimitry Andric
6841*0b57cec5SDimitry Andric// AVX aliases
6842*0b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
6843*0b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
6844*0b57cec5SDimitry Andric
6845*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6846*0b57cec5SDimitry Andric// SSE4A Instructions
6847*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6848*0b57cec5SDimitry Andric
6849*0b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in {
6850*0b57cec5SDimitry Andric
6851*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
6852*0b57cec5SDimitry Andriclet Constraints = "$src = $dst" in {
6853*0b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
6854*0b57cec5SDimitry Andric                 (ins VR128:$src, u8imm:$len, u8imm:$idx),
6855*0b57cec5SDimitry Andric                 "extrq\t{$idx, $len, $src|$src, $len, $idx}",
6856*0b57cec5SDimitry Andric                 [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
6857*0b57cec5SDimitry Andric                                    imm:$idx))]>,
6858*0b57cec5SDimitry Andric                 PD, Sched<[SchedWriteVecALU.XMM]>;
6859*0b57cec5SDimitry Andricdef EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
6860*0b57cec5SDimitry Andric              (ins VR128:$src, VR128:$mask),
6861*0b57cec5SDimitry Andric              "extrq\t{$mask, $src|$src, $mask}",
6862*0b57cec5SDimitry Andric              [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
6863*0b57cec5SDimitry Andric                                 VR128:$mask))]>,
6864*0b57cec5SDimitry Andric              PD, Sched<[SchedWriteVecALU.XMM]>;
6865*0b57cec5SDimitry Andric
6866*0b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
6867*0b57cec5SDimitry Andric                   (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
6868*0b57cec5SDimitry Andric                   "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
6869*0b57cec5SDimitry Andric                   [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
6870*0b57cec5SDimitry Andric                                      imm:$len, imm:$idx))]>,
6871*0b57cec5SDimitry Andric                   XD, Sched<[SchedWriteVecALU.XMM]>;
6872*0b57cec5SDimitry Andricdef INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
6873*0b57cec5SDimitry Andric                 (ins VR128:$src, VR128:$mask),
6874*0b57cec5SDimitry Andric                 "insertq\t{$mask, $src|$src, $mask}",
6875*0b57cec5SDimitry Andric                 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
6876*0b57cec5SDimitry Andric                                    VR128:$mask))]>,
6877*0b57cec5SDimitry Andric                 XD, Sched<[SchedWriteVecALU.XMM]>;
6878*0b57cec5SDimitry Andric}
6879*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
6880*0b57cec5SDimitry Andric
6881*0b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores.
6882*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
6883*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
6884*0b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
6885*0b57cec5SDimitry Andric                "movntss\t{$src, $dst|$dst, $src}", []>, XS;
6886*0b57cec5SDimitry Andric
6887*0b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6888*0b57cec5SDimitry Andric                "movntsd\t{$src, $dst|$dst, $src}", []>, XD;
6889*0b57cec5SDimitry Andric} // SchedRW
6890*0b57cec5SDimitry Andric
6891*0b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst),
6892*0b57cec5SDimitry Andric          (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
6893*0b57cec5SDimitry Andric
6894*0b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst),
6895*0b57cec5SDimitry Andric          (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
6896*0b57cec5SDimitry Andric
6897*0b57cec5SDimitry Andric} // AddedComplexity
6898*0b57cec5SDimitry Andric} // HasSSE4A
6899*0b57cec5SDimitry Andric
6900*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6901*0b57cec5SDimitry Andric// AVX Instructions
6902*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6903*0b57cec5SDimitry Andric
6904*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6905*0b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the
6906*0b57cec5SDimitry Andric//              destination operand
6907*0b57cec5SDimitry Andric//
6908*0b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
6909*0b57cec5SDimitry Andric                           X86MemOperand x86memop, ValueType VT,
6910*0b57cec5SDimitry Andric                           PatFrag ld_frag, SchedWrite Sched> :
6911*0b57cec5SDimitry Andric  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
6912*0b57cec5SDimitry Andric        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6913*0b57cec5SDimitry Andric        [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
6914*0b57cec5SDimitry Andric        Sched<[Sched]>, VEX;
6915*0b57cec5SDimitry Andric
6916*0b57cec5SDimitry Andric// AVX2 adds register forms
6917*0b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
6918*0b57cec5SDimitry Andric                        ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
6919*0b57cec5SDimitry Andric  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
6920*0b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6921*0b57cec5SDimitry Andric         [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
6922*0b57cec5SDimitry Andric         Sched<[Sched]>, VEX;
6923*0b57cec5SDimitry Andric
6924*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
6925*0b57cec5SDimitry Andric  def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
6926*0b57cec5SDimitry Andric                                         f32mem, v4f32, loadf32,
6927*0b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>;
6928*0b57cec5SDimitry Andric  def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
6929*0b57cec5SDimitry Andric                                         f32mem, v8f32, loadf32,
6930*0b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>, VEX_L;
6931*0b57cec5SDimitry Andric}
6932*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in
6933*0b57cec5SDimitry Andricdef VBROADCASTSDYrm  : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
6934*0b57cec5SDimitry Andric                                        v4f64, loadf64,
6935*0b57cec5SDimitry Andric                                        SchedWriteFShuffle.XMM.Folded>, VEX_L;
6936*0b57cec5SDimitry Andric
6937*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in {
6938*0b57cec5SDimitry Andric  def VBROADCASTSSrr  : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
6939*0b57cec5SDimitry Andric                                          v4f32, v4f32, SchedWriteFShuffle.XMM>;
6940*0b57cec5SDimitry Andric  def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
6941*0b57cec5SDimitry Andric                                          v8f32, v4f32, WriteFShuffle256>, VEX_L;
6942*0b57cec5SDimitry Andric}
6943*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
6944*0b57cec5SDimitry Andricdef VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
6945*0b57cec5SDimitry Andric                                         v4f64, v2f64, WriteFShuffle256>, VEX_L;
6946*0b57cec5SDimitry Andric
6947*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
6948*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
6949*0b57cec5SDimitry Andric            (VBROADCASTSSrm addr:$src)>;
6950*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
6951*0b57cec5SDimitry Andric            (VBROADCASTSSYrm addr:$src)>;
6952*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
6953*0b57cec5SDimitry Andric            (VBROADCASTSDYrm addr:$src)>;
6954*0b57cec5SDimitry Andric}
6955*0b57cec5SDimitry Andric
6956*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6957*0b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
6958*0b57cec5SDimitry Andric//                  halves of a 256-bit vector.
6959*0b57cec5SDimitry Andric//
6960*0b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
6961*0b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
6962*0b57cec5SDimitry Andric                           (ins i128mem:$src),
6963*0b57cec5SDimitry Andric                           "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
6964*0b57cec5SDimitry Andric                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
6965*0b57cec5SDimitry Andric
6966*0b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
6967*0b57cec5SDimitry Andric    ExeDomain = SSEPackedSingle in
6968*0b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
6969*0b57cec5SDimitry Andric                           (ins f128mem:$src),
6970*0b57cec5SDimitry Andric                           "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
6971*0b57cec5SDimitry Andric                           Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
6972*0b57cec5SDimitry Andric
6973*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
6974*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
6975*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6976*0b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
6977*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6978*0b57cec5SDimitry Andric}
6979*0b57cec5SDimitry Andric
6980*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can
6981*0b57cec5SDimitry Andric// convert to integer when profitable.
6982*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
6983*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
6984*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6985*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
6986*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6987*0b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
6988*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6989*0b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
6990*0b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
6991*0b57cec5SDimitry Andric}
6992*0b57cec5SDimitry Andric
6993*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6994*0b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values
6995*0b57cec5SDimitry Andric//
6996*0b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
6997*0b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
6998*0b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
6999*0b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7000*0b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
7001*0b57cec5SDimitry Andriclet mayLoad = 1 in
7002*0b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
7003*0b57cec5SDimitry Andric          (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
7004*0b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7005*0b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
7006*0b57cec5SDimitry Andric}
7007*0b57cec5SDimitry Andric
7008*0b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS
7009*0b57cec5SDimitry Andric// with YMM register containing zero.
7010*0b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs.
7011*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7012*0b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
7013*0b57cec5SDimitry Andric}
7014*0b57cec5SDimitry Andric
7015*0b57cec5SDimitry Andricmulticlass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
7016*0b57cec5SDimitry Andric                            PatFrag memop_frag> {
7017*0b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
7018*0b57cec5SDimitry Andric                                   (iPTR imm)),
7019*0b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
7020*0b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
7021*0b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
7022*0b57cec5SDimitry Andric                                    (From (memop_frag addr:$src2)),
7023*0b57cec5SDimitry Andric                                    (iPTR imm)),
7024*0b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
7025*0b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
7026*0b57cec5SDimitry Andric}
7027*0b57cec5SDimitry Andric
7028*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7029*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>;
7030*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>;
7031*0b57cec5SDimitry Andric}
7032*0b57cec5SDimitry Andric
7033*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7034*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64,  loadv2i64>;
7035*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32,  loadv4i32>;
7036*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
7037*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8,  loadv16i8>;
7038*0b57cec5SDimitry Andric}
7039*0b57cec5SDimitry Andric
7040*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7041*0b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values
7042*0b57cec5SDimitry Andric//
7043*0b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
7044*0b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
7045*0b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
7046*0b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7047*0b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX, VEX_L;
7048*0b57cec5SDimitry Andriclet mayStore = 1 in
7049*0b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
7050*0b57cec5SDimitry Andric          (ins f128mem:$dst, VR256:$src1, u8imm:$src2),
7051*0b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7052*0b57cec5SDimitry Andric          []>, Sched<[WriteFStoreX]>, VEX, VEX_L;
7053*0b57cec5SDimitry Andric}
7054*0b57cec5SDimitry Andric
7055*0b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> {
7056*0b57cec5SDimitry Andric  def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
7057*0b57cec5SDimitry Andric            (To (!cast<Instruction>(InstrStr#rr)
7058*0b57cec5SDimitry Andric                                    (From VR256:$src1),
7059*0b57cec5SDimitry Andric                                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
7060*0b57cec5SDimitry Andric  def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1),
7061*0b57cec5SDimitry Andric                                                 (iPTR imm))), addr:$dst),
7062*0b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1,
7063*0b57cec5SDimitry Andric             (EXTRACT_get_vextract128_imm VR128:$ext))>;
7064*0b57cec5SDimitry Andric}
7065*0b57cec5SDimitry Andric
7066*0b57cec5SDimitry Andric// AVX1 patterns
7067*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7068*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>;
7069*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>;
7070*0b57cec5SDimitry Andric}
7071*0b57cec5SDimitry Andric
7072*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7073*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4i64,  v2i64>;
7074*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8i32,  v4i32>;
7075*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
7076*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v32i8,  v16i8>;
7077*0b57cec5SDimitry Andric}
7078*0b57cec5SDimitry Andric
7079*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7080*0b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores
7081*0b57cec5SDimitry Andric//
7082*0b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
7083*0b57cec5SDimitry Andric                          Intrinsic IntLd, Intrinsic IntLd256,
7084*0b57cec5SDimitry Andric                          Intrinsic IntSt, Intrinsic IntSt256> {
7085*0b57cec5SDimitry Andric  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
7086*0b57cec5SDimitry Andric             (ins VR128:$src1, f128mem:$src2),
7087*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7088*0b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
7089*0b57cec5SDimitry Andric             VEX_4V, Sched<[WriteFMaskedLoad]>;
7090*0b57cec5SDimitry Andric  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
7091*0b57cec5SDimitry Andric             (ins VR256:$src1, f256mem:$src2),
7092*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7093*0b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
7094*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
7095*0b57cec5SDimitry Andric  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
7096*0b57cec5SDimitry Andric             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
7097*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7098*0b57cec5SDimitry Andric             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
7099*0b57cec5SDimitry Andric             VEX_4V, Sched<[WriteFMaskedStore]>;
7100*0b57cec5SDimitry Andric  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
7101*0b57cec5SDimitry Andric             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
7102*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7103*0b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
7104*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
7105*0b57cec5SDimitry Andric}
7106*0b57cec5SDimitry Andric
7107*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
7108*0b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
7109*0b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps,
7110*0b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps_256,
7111*0b57cec5SDimitry Andric                                 int_x86_avx_maskstore_ps,
7112*0b57cec5SDimitry Andric                                 int_x86_avx_maskstore_ps_256>;
7113*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
7114*0b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
7115*0b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd,
7116*0b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd_256,
7117*0b57cec5SDimitry Andric                                 int_x86_avx_maskstore_pd,
7118*0b57cec5SDimitry Andric                                 int_x86_avx_maskstore_pd_256>;
7119*0b57cec5SDimitry Andric
7120*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7121*0b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values
7122*0b57cec5SDimitry Andric//
7123*0b57cec5SDimitry Andric
7124*0b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
7125*0b57cec5SDimitry Andric                      RegisterClass RC, X86MemOperand x86memop_f,
7126*0b57cec5SDimitry Andric                      X86MemOperand x86memop_i,
7127*0b57cec5SDimitry Andric                      ValueType f_vt, ValueType i_vt,
7128*0b57cec5SDimitry Andric                      X86FoldableSchedWrite sched,
7129*0b57cec5SDimitry Andric                      X86FoldableSchedWrite varsched> {
7130*0b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
7131*0b57cec5SDimitry Andric    def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
7132*0b57cec5SDimitry Andric               (ins RC:$src1, RC:$src2),
7133*0b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7134*0b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
7135*0b57cec5SDimitry Andric               Sched<[varsched]>;
7136*0b57cec5SDimitry Andric    def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
7137*0b57cec5SDimitry Andric               (ins RC:$src1, x86memop_i:$src2),
7138*0b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7139*0b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
7140*0b57cec5SDimitry Andric                              (i_vt (load addr:$src2)))))]>, VEX_4V,
7141*0b57cec5SDimitry Andric               Sched<[varsched.Folded, sched.ReadAfterFold]>;
7142*0b57cec5SDimitry Andric
7143*0b57cec5SDimitry Andric    def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
7144*0b57cec5SDimitry Andric             (ins RC:$src1, u8imm:$src2),
7145*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7146*0b57cec5SDimitry Andric             [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
7147*0b57cec5SDimitry Andric             Sched<[sched]>;
7148*0b57cec5SDimitry Andric    def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
7149*0b57cec5SDimitry Andric             (ins x86memop_f:$src1, u8imm:$src2),
7150*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7151*0b57cec5SDimitry Andric             [(set RC:$dst,
7152*0b57cec5SDimitry Andric               (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
7153*0b57cec5SDimitry Andric             Sched<[sched.Folded]>;
7154*0b57cec5SDimitry Andric  }// Predicates = [HasAVX, NoVLX]
7155*0b57cec5SDimitry Andric}
7156*0b57cec5SDimitry Andric
7157*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
7158*0b57cec5SDimitry Andric  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
7159*0b57cec5SDimitry Andric                               v4f32, v4i32, SchedWriteFShuffle.XMM,
7160*0b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
7161*0b57cec5SDimitry Andric  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
7162*0b57cec5SDimitry Andric                               v8f32, v8i32, SchedWriteFShuffle.YMM,
7163*0b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
7164*0b57cec5SDimitry Andric}
7165*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
7166*0b57cec5SDimitry Andric  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
7167*0b57cec5SDimitry Andric                               v2f64, v2i64, SchedWriteFShuffle.XMM,
7168*0b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
7169*0b57cec5SDimitry Andric  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
7170*0b57cec5SDimitry Andric                               v4f64, v4i64, SchedWriteFShuffle.YMM,
7171*0b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
7172*0b57cec5SDimitry Andric}
7173*0b57cec5SDimitry Andric
7174*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7175*0b57cec5SDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
7176*0b57cec5SDimitry Andric//
7177*0b57cec5SDimitry Andric
7178*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
7179*0b57cec5SDimitry Andriclet isCommutable = 1 in
7180*0b57cec5SDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
7181*0b57cec5SDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7182*0b57cec5SDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7183*0b57cec5SDimitry Andric          [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
7184*0b57cec5SDimitry Andric                              (i8 imm:$src3))))]>, VEX_4V, VEX_L,
7185*0b57cec5SDimitry Andric          Sched<[WriteFShuffle256]>;
7186*0b57cec5SDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
7187*0b57cec5SDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7188*0b57cec5SDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7189*0b57cec5SDimitry Andric          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
7190*0b57cec5SDimitry Andric                             (i8 imm:$src3)))]>, VEX_4V, VEX_L,
7191*0b57cec5SDimitry Andric          Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
7192*0b57cec5SDimitry Andric}
7193*0b57cec5SDimitry Andric
7194*0b57cec5SDimitry Andric// Immediate transform to help with commuting.
7195*0b57cec5SDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<imm, [{
7196*0b57cec5SDimitry Andric  return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
7197*0b57cec5SDimitry Andric}]>;
7198*0b57cec5SDimitry Andric
7199*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
7200*0b57cec5SDimitry Andric// Pattern with load in other operand.
7201*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
7202*0b57cec5SDimitry Andric                                VR256:$src1, (i8 imm:$imm))),
7203*0b57cec5SDimitry Andric          (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
7204*0b57cec5SDimitry Andric}
7205*0b57cec5SDimitry Andric
7206*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7207*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7208*0b57cec5SDimitry Andric          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7209*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
7210*0b57cec5SDimitry Andric                  (loadv4i64 addr:$src2), (i8 imm:$imm))),
7211*0b57cec5SDimitry Andric          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7212*0b57cec5SDimitry Andric// Pattern with load in other operand.
7213*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
7214*0b57cec5SDimitry Andric                                VR256:$src1, (i8 imm:$imm))),
7215*0b57cec5SDimitry Andric          (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
7216*0b57cec5SDimitry Andric}
7217*0b57cec5SDimitry Andric
7218*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7219*0b57cec5SDimitry Andric// VZERO - Zero YMM registers
7220*0b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31.
7221*0b57cec5SDimitry Andric//
7222*0b57cec5SDimitry Andric
7223*0b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
7224*0b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
7225*0b57cec5SDimitry Andric            YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
7226*0b57cec5SDimitry Andric  // Zero All YMM registers
7227*0b57cec5SDimitry Andric  def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
7228*0b57cec5SDimitry Andric                  [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
7229*0b57cec5SDimitry Andric                  Requires<[HasAVX]>, VEX_WIG;
7230*0b57cec5SDimitry Andric
7231*0b57cec5SDimitry Andric  // Zero Upper bits of YMM registers
7232*0b57cec5SDimitry Andric  def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
7233*0b57cec5SDimitry Andric                     [(int_x86_avx_vzeroupper)]>, PS, VEX,
7234*0b57cec5SDimitry Andric                     Requires<[HasAVX]>, VEX_WIG;
7235*0b57cec5SDimitry Andric} // Defs
7236*0b57cec5SDimitry Andric} // SchedRW
7237*0b57cec5SDimitry Andric
7238*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7239*0b57cec5SDimitry Andric// Half precision conversion instructions
7240*0b57cec5SDimitry Andric//
7241*0b57cec5SDimitry Andric
7242*0b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
7243*0b57cec5SDimitry Andric                      X86FoldableSchedWrite sched> {
7244*0b57cec5SDimitry Andric  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
7245*0b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
7246*0b57cec5SDimitry Andric             [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
7247*0b57cec5SDimitry Andric             T8PD, VEX, Sched<[sched]>;
7248*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
7249*0b57cec5SDimitry Andric  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
7250*0b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
7251*0b57cec5SDimitry Andric             [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
7252*0b57cec5SDimitry Andric             T8PD, VEX, Sched<[sched.Folded]>;
7253*0b57cec5SDimitry Andric}
7254*0b57cec5SDimitry Andric
7255*0b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
7256*0b57cec5SDimitry Andric                      SchedWrite RR, SchedWrite MR> {
7257*0b57cec5SDimitry Andric  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
7258*0b57cec5SDimitry Andric               (ins RC:$src1, i32u8imm:$src2),
7259*0b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7260*0b57cec5SDimitry Andric               [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
7261*0b57cec5SDimitry Andric               TAPD, VEX, Sched<[RR]>;
7262*0b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
7263*0b57cec5SDimitry Andric  def mr : Ii8<0x1D, MRMDestMem, (outs),
7264*0b57cec5SDimitry Andric               (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
7265*0b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7266*0b57cec5SDimitry Andric               TAPD, VEX, Sched<[MR]>;
7267*0b57cec5SDimitry Andric}
7268*0b57cec5SDimitry Andric
7269*0b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in {
7270*0b57cec5SDimitry Andric  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
7271*0b57cec5SDimitry Andric  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
7272*0b57cec5SDimitry Andric  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
7273*0b57cec5SDimitry Andric                               WriteCvtPS2PHSt>;
7274*0b57cec5SDimitry Andric  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
7275*0b57cec5SDimitry Andric                               WriteCvtPS2PHYSt>, VEX_L;
7276*0b57cec5SDimitry Andric
7277*0b57cec5SDimitry Andric  // Pattern match vcvtph2ps of a scalar i64 load.
7278*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
7279*0b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
7280*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
7281*0b57cec5SDimitry Andric              (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7282*0b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
7283*0b57cec5SDimitry Andric
7284*0b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
7285*0b57cec5SDimitry Andric                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7286*0b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
7287*0b57cec5SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7288*0b57cec5SDimitry Andric  def : Pat<(store (i64 (extractelt
7289*0b57cec5SDimitry Andric                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7290*0b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
7291*0b57cec5SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7292*0b57cec5SDimitry Andric  def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
7293*0b57cec5SDimitry Andric            (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
7294*0b57cec5SDimitry Andric}
7295*0b57cec5SDimitry Andric
7296*0b57cec5SDimitry Andric// Patterns for  matching conversions from float to half-float and vice versa.
7297*0b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in {
7298*0b57cec5SDimitry Andric  // Use MXCSR.RC for rounding instead of explicitly specifying the default
7299*0b57cec5SDimitry Andric  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7300*0b57cec5SDimitry Andric  // configurations we support (the default). However, falling back to MXCSR is
7301*0b57cec5SDimitry Andric  // more consistent with other instructions, which are always controlled by it.
7302*0b57cec5SDimitry Andric  // It's encoded as 0b100.
7303*0b57cec5SDimitry Andric  def : Pat<(fp_to_f16 FR32:$src),
7304*0b57cec5SDimitry Andric            (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (v8i16 (VCVTPS2PHrr
7305*0b57cec5SDimitry Andric              (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4))), sub_16bit))>;
7306*0b57cec5SDimitry Andric
7307*0b57cec5SDimitry Andric  def : Pat<(f16_to_fp GR16:$src),
7308*0b57cec5SDimitry Andric            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr
7309*0b57cec5SDimitry Andric              (v4i32 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)))), FR32)) >;
7310*0b57cec5SDimitry Andric
7311*0b57cec5SDimitry Andric  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
7312*0b57cec5SDimitry Andric            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr
7313*0b57cec5SDimitry Andric             (v8i16 (VCVTPS2PHrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4)))), FR32)) >;
7314*0b57cec5SDimitry Andric}
7315*0b57cec5SDimitry Andric
7316*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7317*0b57cec5SDimitry Andric// AVX2 Instructions
7318*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7319*0b57cec5SDimitry Andric
7320*0b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
7321*0b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
7322*0b57cec5SDimitry Andric                          ValueType OpVT, X86FoldableSchedWrite sched,
7323*0b57cec5SDimitry Andric                          RegisterClass RC,
7324*0b57cec5SDimitry Andric                          X86MemOperand x86memop, SDNodeXForm commuteXForm> {
7325*0b57cec5SDimitry Andric  let isCommutable = 1 in
7326*0b57cec5SDimitry Andric  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
7327*0b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
7328*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
7329*0b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7330*0b57cec5SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
7331*0b57cec5SDimitry Andric        Sched<[sched]>, VEX_4V;
7332*0b57cec5SDimitry Andric  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
7333*0b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
7334*0b57cec5SDimitry Andric        !strconcat(OpcodeStr,
7335*0b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7336*0b57cec5SDimitry Andric        [(set RC:$dst,
7337*0b57cec5SDimitry Andric          (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
7338*0b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
7339*0b57cec5SDimitry Andric
7340*0b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
7341*0b57cec5SDimitry Andric  def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
7342*0b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
7343*0b57cec5SDimitry Andric                                            (commuteXForm imm:$src3))>;
7344*0b57cec5SDimitry Andric}
7345*0b57cec5SDimitry Andric
7346*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
7347*0b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
7348*0b57cec5SDimitry Andric                               SchedWriteBlend.XMM, VR128, i128mem,
7349*0b57cec5SDimitry Andric                               BlendCommuteImm4>;
7350*0b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
7351*0b57cec5SDimitry Andric                                SchedWriteBlend.YMM, VR256, i256mem,
7352*0b57cec5SDimitry Andric                                BlendCommuteImm8>, VEX_L;
7353*0b57cec5SDimitry Andric
7354*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
7355*0b57cec5SDimitry Andric          (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
7356*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
7357*0b57cec5SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
7358*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
7359*0b57cec5SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
7360*0b57cec5SDimitry Andric
7361*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
7362*0b57cec5SDimitry Andric          (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
7363*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
7364*0b57cec5SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
7365*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
7366*0b57cec5SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
7367*0b57cec5SDimitry Andric}
7368*0b57cec5SDimitry Andric
7369*0b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
7370*0b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
7371*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but exeuction domain fixing should
7372*0b57cec5SDimitry Andric// take care of using integer instructions when profitable.
7373*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
7374*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
7375*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
7376*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7377*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
7378*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
7379*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
7380*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7381*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
7382*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
7383*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
7384*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7385*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
7386*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
7387*0b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
7388*0b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7389*0b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
7390*0b57cec5SDimitry Andric
7391*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
7392*0b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7393*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7394*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
7395*0b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7396*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7397*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
7398*0b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7399*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7400*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
7401*0b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7402*0b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7403*0b57cec5SDimitry Andric}
7404*0b57cec5SDimitry Andric
7405*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7406*0b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the
7407*0b57cec5SDimitry Andric//               destination operand
7408*0b57cec5SDimitry Andric//
7409*0b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr,
7410*0b57cec5SDimitry Andric                          X86MemOperand x86memop, PatFrag ld_frag,
7411*0b57cec5SDimitry Andric                          ValueType OpVT128, ValueType OpVT256, Predicate prd> {
7412*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in {
7413*0b57cec5SDimitry Andric    def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
7414*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7415*0b57cec5SDimitry Andric                  [(set VR128:$dst,
7416*0b57cec5SDimitry Andric                   (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
7417*0b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM]>, VEX;
7418*0b57cec5SDimitry Andric    def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
7419*0b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7420*0b57cec5SDimitry Andric                  [(set VR128:$dst,
7421*0b57cec5SDimitry Andric                   (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
7422*0b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM.Folded]>, VEX;
7423*0b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
7424*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7425*0b57cec5SDimitry Andric                   [(set VR256:$dst,
7426*0b57cec5SDimitry Andric                    (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
7427*0b57cec5SDimitry Andric                   Sched<[WriteShuffle256]>, VEX, VEX_L;
7428*0b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
7429*0b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7430*0b57cec5SDimitry Andric                   [(set VR256:$dst,
7431*0b57cec5SDimitry Andric                    (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
7432*0b57cec5SDimitry Andric                   Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L;
7433*0b57cec5SDimitry Andric
7434*0b57cec5SDimitry Andric    // Provide aliases for broadcast from the same register class that
7435*0b57cec5SDimitry Andric    // automatically does the extract.
7436*0b57cec5SDimitry Andric    def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
7437*0b57cec5SDimitry Andric              (!cast<Instruction>(NAME#"Yrr")
7438*0b57cec5SDimitry Andric                  (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
7439*0b57cec5SDimitry Andric  }
7440*0b57cec5SDimitry Andric}
7441*0b57cec5SDimitry Andric
7442*0b57cec5SDimitry Andricdefm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
7443*0b57cec5SDimitry Andric                                    v16i8, v32i8, NoVLX_Or_NoBWI>;
7444*0b57cec5SDimitry Andricdefm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
7445*0b57cec5SDimitry Andric                                    v8i16, v16i16, NoVLX_Or_NoBWI>;
7446*0b57cec5SDimitry Andricdefm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
7447*0b57cec5SDimitry Andric                                    v4i32, v8i32, NoVLX>;
7448*0b57cec5SDimitry Andricdefm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
7449*0b57cec5SDimitry Andric                                    v2i64, v4i64, NoVLX>;
7450*0b57cec5SDimitry Andric
7451*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7452*0b57cec5SDimitry Andric  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
7453*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
7454*0b57cec5SDimitry Andric            (VPBROADCASTQrm addr:$src)>;
7455*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
7456*0b57cec5SDimitry Andric            (VPBROADCASTQYrm addr:$src)>;
7457*0b57cec5SDimitry Andric
7458*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
7459*0b57cec5SDimitry Andric            (VPBROADCASTDrm addr:$src)>;
7460*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
7461*0b57cec5SDimitry Andric            (VPBROADCASTDYrm addr:$src)>;
7462*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
7463*0b57cec5SDimitry Andric            (VPBROADCASTQrm addr:$src)>;
7464*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
7465*0b57cec5SDimitry Andric            (VPBROADCASTQYrm addr:$src)>;
7466*0b57cec5SDimitry Andric}
7467*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
7468*0b57cec5SDimitry Andric  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
7469*0b57cec5SDimitry Andric  // This means we'll encounter truncated i32 loads; match that here.
7470*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
7471*0b57cec5SDimitry Andric            (VPBROADCASTWrm addr:$src)>;
7472*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
7473*0b57cec5SDimitry Andric            (VPBROADCASTWYrm addr:$src)>;
7474*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast
7475*0b57cec5SDimitry Andric              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
7476*0b57cec5SDimitry Andric            (VPBROADCASTWrm addr:$src)>;
7477*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast
7478*0b57cec5SDimitry Andric              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
7479*0b57cec5SDimitry Andric            (VPBROADCASTWrm addr:$src)>;
7480*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast
7481*0b57cec5SDimitry Andric              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
7482*0b57cec5SDimitry Andric            (VPBROADCASTWYrm addr:$src)>;
7483*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast
7484*0b57cec5SDimitry Andric              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
7485*0b57cec5SDimitry Andric            (VPBROADCASTWYrm addr:$src)>;
7486*0b57cec5SDimitry Andric}
7487*0b57cec5SDimitry Andric
7488*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7489*0b57cec5SDimitry Andric  // Provide aliases for broadcast from the same register class that
7490*0b57cec5SDimitry Andric  // automatically does the extract.
7491*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
7492*0b57cec5SDimitry Andric            (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
7493*0b57cec5SDimitry Andric                                                    sub_xmm)))>;
7494*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))),
7495*0b57cec5SDimitry Andric            (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
7496*0b57cec5SDimitry Andric                                                    sub_xmm)))>;
7497*0b57cec5SDimitry Andric}
7498*0b57cec5SDimitry Andric
7499*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7500*0b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
7501*0b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
7502*0b57cec5SDimitry Andric    def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
7503*0b57cec5SDimitry Andric              (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
7504*0b57cec5SDimitry Andric    def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
7505*0b57cec5SDimitry Andric              (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
7506*0b57cec5SDimitry Andric    def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
7507*0b57cec5SDimitry Andric              (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
7508*0b57cec5SDimitry Andric}
7509*0b57cec5SDimitry Andric
7510*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
7511*0b57cec5SDimitry Andric  def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
7512*0b57cec5SDimitry Andric        (VPBROADCASTBrr (v16i8 (COPY_TO_REGCLASS
7513*0b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
7514*0b57cec5SDimitry Andric                                             GR8:$src, sub_8bit)),
7515*0b57cec5SDimitry Andric                         VR128)))>;
7516*0b57cec5SDimitry Andric  def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
7517*0b57cec5SDimitry Andric        (VPBROADCASTBYrr (v16i8 (COPY_TO_REGCLASS
7518*0b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
7519*0b57cec5SDimitry Andric                                              GR8:$src, sub_8bit)),
7520*0b57cec5SDimitry Andric                          VR128)))>;
7521*0b57cec5SDimitry Andric
7522*0b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
7523*0b57cec5SDimitry Andric        (VPBROADCASTWrr (v8i16 (COPY_TO_REGCLASS
7524*0b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
7525*0b57cec5SDimitry Andric                                             GR16:$src, sub_16bit)),
7526*0b57cec5SDimitry Andric                         VR128)))>;
7527*0b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
7528*0b57cec5SDimitry Andric        (VPBROADCASTWYrr (v8i16 (COPY_TO_REGCLASS
7529*0b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
7530*0b57cec5SDimitry Andric                                              GR16:$src, sub_16bit)),
7531*0b57cec5SDimitry Andric                          VR128)))>;
7532*0b57cec5SDimitry Andric}
7533*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7534*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
7535*0b57cec5SDimitry Andric            (VPBROADCASTDrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>;
7536*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
7537*0b57cec5SDimitry Andric            (VPBROADCASTDYrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>;
7538*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
7539*0b57cec5SDimitry Andric            (VPBROADCASTQrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>;
7540*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
7541*0b57cec5SDimitry Andric            (VPBROADCASTQYrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>;
7542*0b57cec5SDimitry Andric}
7543*0b57cec5SDimitry Andric
7544*0b57cec5SDimitry Andric// AVX1 broadcast patterns
7545*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7546*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
7547*0b57cec5SDimitry Andric          (VBROADCASTSSYrm addr:$src)>;
7548*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
7549*0b57cec5SDimitry Andric          (VBROADCASTSDYrm addr:$src)>;
7550*0b57cec5SDimitry Andricdef : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
7551*0b57cec5SDimitry Andric          (VBROADCASTSSrm addr:$src)>;
7552*0b57cec5SDimitry Andric}
7553*0b57cec5SDimitry Andric
7554*0b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
7555*0b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
7556*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7557*0b57cec5SDimitry Andric  // 128bit broadcasts:
7558*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast f64:$src)),
7559*0b57cec5SDimitry Andric            (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
7560*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
7561*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
7562*0b57cec5SDimitry Andric
7563*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
7564*0b57cec5SDimitry Andric            (VMOVDDUPrr VR128:$src)>;
7565*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
7566*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
7567*0b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
7568*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
7569*0b57cec5SDimitry Andric}
7570*0b57cec5SDimitry Andric
7571*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7572*0b57cec5SDimitry Andric  def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
7573*0b57cec5SDimitry Andric            (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>;
7574*0b57cec5SDimitry Andric  def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
7575*0b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
7576*0b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
7577*0b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
7578*0b57cec5SDimitry Andric  def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
7579*0b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
7580*0b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
7581*0b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
7582*0b57cec5SDimitry Andric
7583*0b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
7584*0b57cec5SDimitry Andric            (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)>;
7585*0b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
7586*0b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7587*0b57cec5SDimitry Andric              (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), sub_xmm),
7588*0b57cec5SDimitry Andric              (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), 1)>;
7589*0b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
7590*0b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
7591*0b57cec5SDimitry Andric              (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), sub_xmm),
7592*0b57cec5SDimitry Andric              (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), 1)>;
7593*0b57cec5SDimitry Andric
7594*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast i64:$src)),
7595*0b57cec5SDimitry Andric            (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)>;
7596*0b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
7597*0b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
7598*0b57cec5SDimitry Andric}
7599*0b57cec5SDimitry Andric
7600*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7601*0b57cec5SDimitry Andric// VPERM - Permute instructions
7602*0b57cec5SDimitry Andric//
7603*0b57cec5SDimitry Andric
7604*0b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr,
7605*0b57cec5SDimitry Andric                     ValueType OpVT, X86FoldableSchedWrite Sched,
7606*0b57cec5SDimitry Andric                     X86MemOperand memOp> {
7607*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
7608*0b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
7609*0b57cec5SDimitry Andric                     (ins VR256:$src1, VR256:$src2),
7610*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
7611*0b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7612*0b57cec5SDimitry Andric                     [(set VR256:$dst,
7613*0b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
7614*0b57cec5SDimitry Andric                     Sched<[Sched]>, VEX_4V, VEX_L;
7615*0b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
7616*0b57cec5SDimitry Andric                     (ins VR256:$src1, memOp:$src2),
7617*0b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
7618*0b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7619*0b57cec5SDimitry Andric                     [(set VR256:$dst,
7620*0b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1,
7621*0b57cec5SDimitry Andric                              (load addr:$src2))))]>,
7622*0b57cec5SDimitry Andric                     Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
7623*0b57cec5SDimitry Andric  }
7624*0b57cec5SDimitry Andric}
7625*0b57cec5SDimitry Andric
7626*0b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
7627*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
7628*0b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
7629*0b57cec5SDimitry Andric
7630*0b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
7631*0b57cec5SDimitry Andric                         ValueType OpVT, X86FoldableSchedWrite Sched,
7632*0b57cec5SDimitry Andric                         X86MemOperand memOp> {
7633*0b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
7634*0b57cec5SDimitry Andric    def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
7635*0b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
7636*0b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
7637*0b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7638*0b57cec5SDimitry Andric                       [(set VR256:$dst,
7639*0b57cec5SDimitry Andric                         (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
7640*0b57cec5SDimitry Andric                       Sched<[Sched]>, VEX, VEX_L;
7641*0b57cec5SDimitry Andric    def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
7642*0b57cec5SDimitry Andric                       (ins memOp:$src1, u8imm:$src2),
7643*0b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
7644*0b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7645*0b57cec5SDimitry Andric                       [(set VR256:$dst,
7646*0b57cec5SDimitry Andric                         (OpVT (X86VPermi (mem_frag addr:$src1),
7647*0b57cec5SDimitry Andric                                (i8 imm:$src2))))]>,
7648*0b57cec5SDimitry Andric                       Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
7649*0b57cec5SDimitry Andric  }
7650*0b57cec5SDimitry Andric}
7651*0b57cec5SDimitry Andric
7652*0b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
7653*0b57cec5SDimitry Andric                            WriteShuffle256, i256mem>, VEX_W;
7654*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
7655*0b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
7656*0b57cec5SDimitry Andric                             WriteFShuffle256, f256mem>, VEX_W;
7657*0b57cec5SDimitry Andric
7658*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7659*0b57cec5SDimitry Andric// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
7660*0b57cec5SDimitry Andric//
7661*0b57cec5SDimitry Andriclet isCommutable = 1 in
7662*0b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
7663*0b57cec5SDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7664*0b57cec5SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7665*0b57cec5SDimitry Andric          [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
7666*0b57cec5SDimitry Andric                            (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
7667*0b57cec5SDimitry Andric          VEX_4V, VEX_L;
7668*0b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
7669*0b57cec5SDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7670*0b57cec5SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7671*0b57cec5SDimitry Andric          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
7672*0b57cec5SDimitry Andric                             (i8 imm:$src3)))]>,
7673*0b57cec5SDimitry Andric          Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
7674*0b57cec5SDimitry Andric
7675*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
7676*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
7677*0b57cec5SDimitry Andric                                VR256:$src1, (i8 imm:$imm))),
7678*0b57cec5SDimitry Andric          (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
7679*0b57cec5SDimitry Andric
7680*0b57cec5SDimitry Andric
7681*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7682*0b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values
7683*0b57cec5SDimitry Andric//
7684*0b57cec5SDimitry Andriclet hasSideEffects = 0 in {
7685*0b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
7686*0b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
7687*0b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7688*0b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
7689*0b57cec5SDimitry Andriclet mayLoad = 1 in
7690*0b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
7691*0b57cec5SDimitry Andric          (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
7692*0b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7693*0b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
7694*0b57cec5SDimitry Andric}
7695*0b57cec5SDimitry Andric
7696*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7697*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64,  loadv2i64>;
7698*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32,  loadv4i32>;
7699*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
7700*0b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8,  loadv16i8>;
7701*0b57cec5SDimitry Andric}
7702*0b57cec5SDimitry Andric
7703*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7704*0b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values
7705*0b57cec5SDimitry Andric//
7706*0b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
7707*0b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
7708*0b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7709*0b57cec5SDimitry Andric          Sched<[WriteShuffle256]>, VEX, VEX_L;
7710*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in
7711*0b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
7712*0b57cec5SDimitry Andric          (ins i128mem:$dst, VR256:$src1, u8imm:$src2),
7713*0b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7714*0b57cec5SDimitry Andric          Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L;
7715*0b57cec5SDimitry Andric
7716*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7717*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v4i64,  v2i64>;
7718*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v8i32,  v4i32>;
7719*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
7720*0b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v32i8,  v16i8>;
7721*0b57cec5SDimitry Andric}
7722*0b57cec5SDimitry Andric
7723*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7724*0b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
7725*0b57cec5SDimitry Andric//
7726*0b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr,
7727*0b57cec5SDimitry Andric                         Intrinsic IntLd128, Intrinsic IntLd256,
7728*0b57cec5SDimitry Andric                         Intrinsic IntSt128, Intrinsic IntSt256> {
7729*0b57cec5SDimitry Andric  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
7730*0b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
7731*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7732*0b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
7733*0b57cec5SDimitry Andric             VEX_4V, Sched<[WriteVecMaskedLoad]>;
7734*0b57cec5SDimitry Andric  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
7735*0b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
7736*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7737*0b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
7738*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
7739*0b57cec5SDimitry Andric  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
7740*0b57cec5SDimitry Andric             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
7741*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7742*0b57cec5SDimitry Andric             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
7743*0b57cec5SDimitry Andric             VEX_4V, Sched<[WriteVecMaskedStore]>;
7744*0b57cec5SDimitry Andric  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
7745*0b57cec5SDimitry Andric             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
7746*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7747*0b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
7748*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
7749*0b57cec5SDimitry Andric}
7750*0b57cec5SDimitry Andric
7751*0b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
7752*0b57cec5SDimitry Andric                                int_x86_avx2_maskload_d,
7753*0b57cec5SDimitry Andric                                int_x86_avx2_maskload_d_256,
7754*0b57cec5SDimitry Andric                                int_x86_avx2_maskstore_d,
7755*0b57cec5SDimitry Andric                                int_x86_avx2_maskstore_d_256>;
7756*0b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
7757*0b57cec5SDimitry Andric                                int_x86_avx2_maskload_q,
7758*0b57cec5SDimitry Andric                                int_x86_avx2_maskload_q_256,
7759*0b57cec5SDimitry Andric                                int_x86_avx2_maskstore_q,
7760*0b57cec5SDimitry Andric                                int_x86_avx2_maskstore_q_256>, VEX_W;
7761*0b57cec5SDimitry Andric
7762*0b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
7763*0b57cec5SDimitry Andric                          ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
7764*0b57cec5SDimitry Andric    // masked store
7765*0b57cec5SDimitry Andric    def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
7766*0b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
7767*0b57cec5SDimitry Andric    // masked load
7768*0b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
7769*0b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
7770*0b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
7771*0b57cec5SDimitry Andric                              (VT immAllZerosV))),
7772*0b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
7773*0b57cec5SDimitry Andric}
7774*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
7775*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
7776*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>;
7777*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>;
7778*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>;
7779*0b57cec5SDimitry Andric}
7780*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7781*0b57cec5SDimitry Andric  // load/store i32/i64 not supported use ps/pd version
7782*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
7783*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
7784*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
7785*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
7786*0b57cec5SDimitry Andric}
7787*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
7788*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
7789*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
7790*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
7791*0b57cec5SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
7792*0b57cec5SDimitry Andric}
7793*0b57cec5SDimitry Andric
7794*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7795*0b57cec5SDimitry Andric// SubVector Broadcasts
7796*0b57cec5SDimitry Andric// Provide fallback in case the load node that is used in the patterns above
7797*0b57cec5SDimitry Andric// is used by additional users, which prevents the pattern selection.
7798*0b57cec5SDimitry Andric
7799*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7800*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
7801*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7802*0b57cec5SDimitry Andric                         (v2f64 VR128:$src), 1)>;
7803*0b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
7804*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7805*0b57cec5SDimitry Andric                         (v4f32 VR128:$src), 1)>;
7806*0b57cec5SDimitry Andric}
7807*0b57cec5SDimitry Andric
7808*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can
7809*0b57cec5SDimitry Andric// convert to integer when profitable.
7810*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7811*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
7812*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7813*0b57cec5SDimitry Andric                         (v2i64 VR128:$src), 1)>;
7814*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
7815*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7816*0b57cec5SDimitry Andric                         (v4i32 VR128:$src), 1)>;
7817*0b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
7818*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7819*0b57cec5SDimitry Andric                         (v8i16 VR128:$src), 1)>;
7820*0b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
7821*0b57cec5SDimitry Andric          (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
7822*0b57cec5SDimitry Andric                         (v16i8 VR128:$src), 1)>;
7823*0b57cec5SDimitry Andric}
7824*0b57cec5SDimitry Andric
7825*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7826*0b57cec5SDimitry Andric// Variable Bit Shifts
7827*0b57cec5SDimitry Andric//
7828*0b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
7829*0b57cec5SDimitry Andric                          ValueType vt128, ValueType vt256> {
7830*0b57cec5SDimitry Andric  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
7831*0b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
7832*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7833*0b57cec5SDimitry Andric             [(set VR128:$dst,
7834*0b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
7835*0b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>;
7836*0b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
7837*0b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
7838*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7839*0b57cec5SDimitry Andric             [(set VR128:$dst,
7840*0b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1,
7841*0b57cec5SDimitry Andric                       (vt128 (load addr:$src2)))))]>,
7842*0b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
7843*0b57cec5SDimitry Andric                            SchedWriteVarVecShift.XMM.ReadAfterFold]>;
7844*0b57cec5SDimitry Andric  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
7845*0b57cec5SDimitry Andric             (ins VR256:$src1, VR256:$src2),
7846*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7847*0b57cec5SDimitry Andric             [(set VR256:$dst,
7848*0b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
7849*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
7850*0b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
7851*0b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
7852*0b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7853*0b57cec5SDimitry Andric             [(set VR256:$dst,
7854*0b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1,
7855*0b57cec5SDimitry Andric                       (vt256 (load addr:$src2)))))]>,
7856*0b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
7857*0b57cec5SDimitry Andric                                   SchedWriteVarVecShift.YMM.ReadAfterFold]>;
7858*0b57cec5SDimitry Andric}
7859*0b57cec5SDimitry Andric
7860*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7861*0b57cec5SDimitry Andric  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
7862*0b57cec5SDimitry Andric  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
7863*0b57cec5SDimitry Andric  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
7864*0b57cec5SDimitry Andric  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
7865*0b57cec5SDimitry Andric  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
7866*0b57cec5SDimitry Andric}
7867*0b57cec5SDimitry Andric
7868*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7869*0b57cec5SDimitry Andric// VGATHER - GATHER Operations
7870*0b57cec5SDimitry Andric
7871*0b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions.
7872*0b57cec5SDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
7873*0b57cec5SDimitry Andric                       ValueType VTy, PatFrag GatherNode128,
7874*0b57cec5SDimitry Andric                       PatFrag GatherNode256, RegisterClass RC256,
7875*0b57cec5SDimitry Andric                       X86MemOperand memop128, X86MemOperand memop256,
7876*0b57cec5SDimitry Andric                       ValueType MTx = VTx, ValueType MTy = VTy> {
7877*0b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
7878*0b57cec5SDimitry Andric            (ins VR128:$src1, memop128:$src2, VR128:$mask),
7879*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
7880*0b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
7881*0b57cec5SDimitry Andric            [(set (VTx VR128:$dst), (MTx VR128:$mask_wb),
7882*0b57cec5SDimitry Andric                  (GatherNode128 VR128:$src1, VR128:$mask,
7883*0b57cec5SDimitry Andric                                vectoraddr:$src2))]>,
7884*0b57cec5SDimitry Andric            VEX, Sched<[WriteLoad]>;
7885*0b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
7886*0b57cec5SDimitry Andric            (ins RC256:$src1, memop256:$src2, RC256:$mask),
7887*0b57cec5SDimitry Andric            !strconcat(OpcodeStr,
7888*0b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
7889*0b57cec5SDimitry Andric            [(set (VTy RC256:$dst), (MTy RC256:$mask_wb),
7890*0b57cec5SDimitry Andric                  (GatherNode256 RC256:$src1, RC256:$mask,
7891*0b57cec5SDimitry Andric                                vectoraddr:$src2))]>,
7892*0b57cec5SDimitry Andric            VEX, VEX_L, Sched<[WriteLoad]>;
7893*0b57cec5SDimitry Andric}
7894*0b57cec5SDimitry Andric
7895*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
7896*0b57cec5SDimitry Andric  let mayLoad = 1, hasSideEffects = 0, Constraints
7897*0b57cec5SDimitry Andric    = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
7898*0b57cec5SDimitry Andric    in {
7899*0b57cec5SDimitry Andric    defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32,
7900*0b57cec5SDimitry Andric                        mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W;
7901*0b57cec5SDimitry Andric    defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64,
7902*0b57cec5SDimitry Andric                        mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W;
7903*0b57cec5SDimitry Andric    defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32,
7904*0b57cec5SDimitry Andric                        mgatherv8i32, VR256, vx128mem, vy256mem>;
7905*0b57cec5SDimitry Andric    defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64,
7906*0b57cec5SDimitry Andric                        mgatherv4i64, VR128, vx64mem, vy128mem>;
7907*0b57cec5SDimitry Andric
7908*0b57cec5SDimitry Andric    let ExeDomain = SSEPackedDouble in {
7909*0b57cec5SDimitry Andric      defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32,
7910*0b57cec5SDimitry Andric                          mgatherv4i32, VR256, vx128mem, vx256mem,
7911*0b57cec5SDimitry Andric                          v2i64, v4i64>, VEX_W;
7912*0b57cec5SDimitry Andric      defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64,
7913*0b57cec5SDimitry Andric                          mgatherv4i64, VR256, vx128mem, vy256mem,
7914*0b57cec5SDimitry Andric                          v2i64, v4i64>, VEX_W;
7915*0b57cec5SDimitry Andric    }
7916*0b57cec5SDimitry Andric
7917*0b57cec5SDimitry Andric    let ExeDomain = SSEPackedSingle in {
7918*0b57cec5SDimitry Andric      defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32,
7919*0b57cec5SDimitry Andric                          mgatherv8i32, VR256, vx128mem, vy256mem,
7920*0b57cec5SDimitry Andric                          v4i32, v8i32>;
7921*0b57cec5SDimitry Andric      defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64,
7922*0b57cec5SDimitry Andric                          mgatherv4i64, VR128, vx64mem, vy128mem,
7923*0b57cec5SDimitry Andric                          v4i32, v4i32>;
7924*0b57cec5SDimitry Andric    }
7925*0b57cec5SDimitry Andric  }
7926*0b57cec5SDimitry Andric}
7927*0b57cec5SDimitry Andric
7928*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7929*0b57cec5SDimitry Andric// GFNI instructions
7930*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7931*0b57cec5SDimitry Andric
7932*0b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
7933*0b57cec5SDimitry Andric                        RegisterClass RC, PatFrag MemOpFrag,
7934*0b57cec5SDimitry Andric                        X86MemOperand X86MemOp, bit Is2Addr = 0> {
7935*0b57cec5SDimitry Andric  let ExeDomain = SSEPackedInt,
7936*0b57cec5SDimitry Andric      AsmString = !if(Is2Addr,
7937*0b57cec5SDimitry Andric        OpcodeStr##"\t{$src2, $dst|$dst, $src2}",
7938*0b57cec5SDimitry Andric        OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
7939*0b57cec5SDimitry Andric    let isCommutable = 1 in
7940*0b57cec5SDimitry Andric    def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
7941*0b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
7942*0b57cec5SDimitry Andric             Sched<[SchedWriteVecALU.XMM]>, T8PD;
7943*0b57cec5SDimitry Andric
7944*0b57cec5SDimitry Andric    def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
7945*0b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
7946*0b57cec5SDimitry Andric                                 (MemOpFrag addr:$src2))))]>,
7947*0b57cec5SDimitry Andric             Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
7948*0b57cec5SDimitry Andric  }
7949*0b57cec5SDimitry Andric}
7950*0b57cec5SDimitry Andric
7951*0b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
7952*0b57cec5SDimitry Andric                           SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
7953*0b57cec5SDimitry Andric                           X86MemOperand X86MemOp, bit Is2Addr = 0> {
7954*0b57cec5SDimitry Andric  let AsmString = !if(Is2Addr,
7955*0b57cec5SDimitry Andric      OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7956*0b57cec5SDimitry Andric      OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
7957*0b57cec5SDimitry Andric  def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
7958*0b57cec5SDimitry Andric              (ins RC:$src1, RC:$src2, u8imm:$src3), "",
7959*0b57cec5SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
7960*0b57cec5SDimitry Andric              SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
7961*0b57cec5SDimitry Andric  def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
7962*0b57cec5SDimitry Andric              (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
7963*0b57cec5SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1,
7964*0b57cec5SDimitry Andric                                    (MemOpFrag addr:$src2),
7965*0b57cec5SDimitry Andric                              imm:$src3)))], SSEPackedInt>,
7966*0b57cec5SDimitry Andric              Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
7967*0b57cec5SDimitry Andric  }
7968*0b57cec5SDimitry Andric}
7969*0b57cec5SDimitry Andric
7970*0b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
7971*0b57cec5SDimitry Andric  let Constraints = "$src1 = $dst",
7972*0b57cec5SDimitry Andric      Predicates  = [HasGFNI, UseSSE2] in
7973*0b57cec5SDimitry Andric  defm NAME         : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
7974*0b57cec5SDimitry Andric                                      VR128, load, i128mem, 1>;
7975*0b57cec5SDimitry Andric  let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
7976*0b57cec5SDimitry Andric    defm V##NAME    : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128,
7977*0b57cec5SDimitry Andric                                      load, i128mem>, VEX_4V, VEX_W;
7978*0b57cec5SDimitry Andric    defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256,
7979*0b57cec5SDimitry Andric                                      load, i256mem>, VEX_4V, VEX_L, VEX_W;
7980*0b57cec5SDimitry Andric  }
7981*0b57cec5SDimitry Andric}
7982*0b57cec5SDimitry Andric
7983*0b57cec5SDimitry Andric// GF2P8MULB
7984*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst",
7985*0b57cec5SDimitry Andric    Predicates  = [HasGFNI, UseSSE2] in
7986*0b57cec5SDimitry Andricdefm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
7987*0b57cec5SDimitry Andric                                    i128mem, 1>;
7988*0b57cec5SDimitry Andriclet Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
7989*0b57cec5SDimitry Andric  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
7990*0b57cec5SDimitry Andric                                   i128mem>, VEX_4V;
7991*0b57cec5SDimitry Andric  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
7992*0b57cec5SDimitry Andric                                   i256mem>, VEX_4V, VEX_L;
7993*0b57cec5SDimitry Andric}
7994*0b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB
7995*0b57cec5SDimitry Andriclet isCommutable = 0 in {
7996*0b57cec5SDimitry Andric  defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb",
7997*0b57cec5SDimitry Andric                                             X86GF2P8affineinvqb>, TAPD;
7998*0b57cec5SDimitry Andric  defm GF2P8AFFINEQB    : GF2P8AFFINE_common<0xCE, "gf2p8affineqb",
7999*0b57cec5SDimitry Andric                                             X86GF2P8affineqb>, TAPD;
8000*0b57cec5SDimitry Andric}
8001*0b57cec5SDimitry Andric
8002