xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td (revision 8a4dda33d67586ca2624f2a38417baa03a533a7f)
10b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric//
90b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions,
100b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation,
110b57cec5SDimitry Andric// machine code emission, and analysis.
120b57cec5SDimitry Andric//
130b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes
170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
20fe6060f1SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
210b57cec5SDimitry Andric                           RegisterClass RC, X86MemOperand x86memop,
220b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
230b57cec5SDimitry Andric                           bit Is2Addr = 1> {
240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
250b57cec5SDimitry Andric  let isCommutable = 1 in {
260b57cec5SDimitry Andric    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
270b57cec5SDimitry Andric       !if(Is2Addr,
280b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
290b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
300b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>,
310b57cec5SDimitry Andric       Sched<[sched]>;
320b57cec5SDimitry Andric  }
330b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
340b57cec5SDimitry Andric       !if(Is2Addr,
350b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
360b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
370b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
380b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
390b57cec5SDimitry Andric}
400b57cec5SDimitry Andric}
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
43349cc55cSDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc,
440b57cec5SDimitry Andric                               SDPatternOperator OpNode, RegisterClass RC,
450b57cec5SDimitry Andric                               ValueType VT, string asm, Operand memopr,
465ffd83dbSDimitry Andric                               PatFrags mem_frags, Domain d,
470b57cec5SDimitry Andric                               X86FoldableSchedWrite sched, bit Is2Addr = 1> {
480b57cec5SDimitry Andriclet hasSideEffects = 0 in {
490b57cec5SDimitry Andric  def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
500b57cec5SDimitry Andric       !if(Is2Addr,
510b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
520b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
530b57cec5SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>,
540b57cec5SDimitry Andric       Sched<[sched]>;
550b57cec5SDimitry Andric  let mayLoad = 1 in
560b57cec5SDimitry Andric  def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
570b57cec5SDimitry Andric       !if(Is2Addr,
580b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
590b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
605ffd83dbSDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>,
610b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
620b57cec5SDimitry Andric}
630b57cec5SDimitry Andric}
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class
66fe6060f1SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
670b57cec5SDimitry Andric                           RegisterClass RC, ValueType vt,
680b57cec5SDimitry Andric                           X86MemOperand x86memop, PatFrag mem_frag,
690b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
700b57cec5SDimitry Andric                           bit Is2Addr = 1> {
710b57cec5SDimitry Andric  let isCommutable = 1 in
720b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
730b57cec5SDimitry Andric       !if(Is2Addr,
740b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
750b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
760b57cec5SDimitry Andric       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>,
770b57cec5SDimitry Andric       Sched<[sched]>;
780b57cec5SDimitry Andric  let mayLoad = 1 in
790b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
800b57cec5SDimitry Andric       !if(Is2Addr,
810b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
820b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
830b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
840b57cec5SDimitry Andric          d>,
850b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
860b57cec5SDimitry Andric}
870b57cec5SDimitry Andric
880b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
890b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
900b57cec5SDimitry Andric                                      string OpcodeStr, X86MemOperand x86memop,
910b57cec5SDimitry Andric                                      X86FoldableSchedWrite sched,
920b57cec5SDimitry Andric                                      list<dag> pat_rr, list<dag> pat_rm,
930b57cec5SDimitry Andric                                      bit Is2Addr = 1> {
940b57cec5SDimitry Andric  let isCommutable = 1, hasSideEffects = 0 in
950b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
960b57cec5SDimitry Andric       !if(Is2Addr,
970b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
980b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
990b57cec5SDimitry Andric       pat_rr, d>,
1000b57cec5SDimitry Andric       Sched<[sched]>;
1010b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
1020b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1030b57cec5SDimitry Andric       !if(Is2Addr,
1040b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1050b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1060b57cec5SDimitry Andric       pat_rm, d>,
1070b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
1080b57cec5SDimitry Andric}
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
1120b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos.
1130b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1140b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
11581ad6265SDimitry Andric  def FsFLD0SH : I<0, Pseudo, (outs FR16:$dst), (ins), "",
11681ad6265SDimitry Andric                   [(set FR16:$dst, fp16imm0)]>, Requires<[HasSSE2, NoAVX512]>;
1170b57cec5SDimitry Andric  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
1180b57cec5SDimitry Andric                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
1190b57cec5SDimitry Andric  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
1208bcb0991SDimitry Andric                   [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>;
1218bcb0991SDimitry Andric  def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1228bcb0991SDimitry Andric                     [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>;
1230b57cec5SDimitry Andric}
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1260b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors
1270b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse.
1300b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
1310b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor.
1320b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
1330b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial.
1340b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1358bcb0991SDimitry Andric    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
1360b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1370b57cec5SDimitry Andric               [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1380b57cec5SDimitry Andric}
1390b57cec5SDimitry Andric
1408bcb0991SDimitry Andriclet Predicates = [NoAVX512] in {
1418bcb0991SDimitry Andricdef : Pat<(v16i8 immAllZerosV), (V_SET0)>;
1428bcb0991SDimitry Andricdef : Pat<(v8i16 immAllZerosV), (V_SET0)>;
143fcaf7f86SDimitry Andricdef : Pat<(v8f16 immAllZerosV), (V_SET0)>;
1440b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>;
1458bcb0991SDimitry Andricdef : Pat<(v2i64 immAllZerosV), (V_SET0)>;
1468bcb0991SDimitry Andricdef : Pat<(v2f64 immAllZerosV), (V_SET0)>;
1478bcb0991SDimitry Andric}
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric// The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
1510b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero
1520b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY
1530b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty
1540b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1550b57cec5SDimitry Andric    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
1560b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
1570b57cec5SDimitry Andric                 [(set VR256:$dst, (v8i32 immAllZerosV))]>;
1580b57cec5SDimitry Andric}
1590b57cec5SDimitry Andric
1608bcb0991SDimitry Andriclet Predicates = [NoAVX512] in {
1618bcb0991SDimitry Andricdef : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
1628bcb0991SDimitry Andricdef : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
163fcaf7f86SDimitry Andricdef : Pat<(v16f16 immAllZerosV), (AVX_SET0)>;
1648bcb0991SDimitry Andricdef : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
1658bcb0991SDimitry Andricdef : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
1668bcb0991SDimitry Andricdef : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
1678bcb0991SDimitry Andric}
1688bcb0991SDimitry Andric
1690b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
1700b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial.
1710b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1720b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
1730b57cec5SDimitry Andric  def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1740b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 immAllOnesV))]>;
1750b57cec5SDimitry Andric  let Predicates = [HasAVX1Only, OptForMinSize] in {
1760b57cec5SDimitry Andric  def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
1770b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
1780b57cec5SDimitry Andric  }
1790b57cec5SDimitry Andric  let Predicates = [HasAVX2] in
1800b57cec5SDimitry Andric  def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
1810b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
1820b57cec5SDimitry Andric}
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1850b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions
1860b57cec5SDimitry Andric//
1870b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
1880b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register
1890b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
1900b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we
1910b57cec5SDimitry Andric// don't use movss/movsd for copies.
1920b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1930b57cec5SDimitry Andric
194349cc55cSDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
19506c3fb27SDimitry Andric                         string asm_opr, Domain d> {
1960b57cec5SDimitry Andric  let isCommutable = 1 in
1970b57cec5SDimitry Andric  def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
1980b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2),
1990b57cec5SDimitry Andric              !strconcat(base_opc, asm_opr),
2000b57cec5SDimitry Andric              [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>,
2010b57cec5SDimitry Andric              Sched<[SchedWriteFShuffle.XMM]>;
2020b57cec5SDimitry Andric
2030b57cec5SDimitry Andric  // For the disassembler
2040b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
2050b57cec5SDimitry Andric  def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
2060b57cec5SDimitry Andric                  (ins VR128:$src1, VR128:$src2),
2070b57cec5SDimitry Andric                  !strconcat(base_opc, asm_opr), []>,
20806c3fb27SDimitry Andric                  Sched<[SchedWriteFShuffle.XMM]>;
2090b57cec5SDimitry Andric}
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
2120b57cec5SDimitry Andric                      X86MemOperand x86memop, string OpcodeStr,
21306c3fb27SDimitry Andric                      Domain d, Predicate pred> {
2140b57cec5SDimitry Andric  // AVX
2150b57cec5SDimitry Andric  let Predicates = [UseAVX, OptForSize] in
216349cc55cSDimitry Andric  defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
21706c3fb27SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
21806c3fb27SDimitry Andric                              VEX_4V, VEX_LIG, WIG;
2190b57cec5SDimitry Andric
2200b57cec5SDimitry Andric  def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2210b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2220b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
22306c3fb27SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFStore]>, WIG;
2240b57cec5SDimitry Andric  // SSE1 & 2
2250b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2260b57cec5SDimitry Andric    let Predicates = [pred, NoSSE41_Or_OptForSize] in
227349cc55cSDimitry Andric    defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
22806c3fb27SDimitry Andric                              "\t{$src2, $dst|$dst, $src2}", d>;
2290b57cec5SDimitry Andric  }
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric  def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2320b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2330b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
2340b57cec5SDimitry Andric                     Sched<[WriteFStore]>;
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric  def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2370b57cec5SDimitry Andric                  (!cast<Instruction>("V"#NAME#"rr_REV")
2380b57cec5SDimitry Andric                   VR128:$dst, VR128:$src1, VR128:$src2), 0>;
2390b57cec5SDimitry Andric  def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}",
2400b57cec5SDimitry Andric                  (!cast<Instruction>(NAME#"rr_REV")
2410b57cec5SDimitry Andric                   VR128:$dst, VR128:$src2), 0>;
2420b57cec5SDimitry Andric}
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits.
2450b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
2460b57cec5SDimitry Andric                         PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
2470b57cec5SDimitry Andric                         Domain d> {
2480b57cec5SDimitry Andric  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
2490b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2500b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
25106c3fb27SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG;
2520b57cec5SDimitry Andric  def NAME#rm   : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
2530b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2540b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
2550b57cec5SDimitry Andric                     Sched<[WriteFLoad]>;
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric  // _alt version uses FR32/FR64 register class.
2580b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
2590b57cec5SDimitry Andric  def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2600b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2610b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
26206c3fb27SDimitry Andric                         VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG;
2630b57cec5SDimitry Andric  def NAME#rm_alt   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2640b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2650b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
2660b57cec5SDimitry Andric                         Sched<[WriteFLoad]>;
2670b57cec5SDimitry Andric  }
2680b57cec5SDimitry Andric}
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
27106c3fb27SDimitry Andric                        SSEPackedSingle, UseSSE1>, XS;
2720b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
27306c3fb27SDimitry Andric                        SSEPackedDouble, UseSSE2>, XD;
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in {
2760b57cec5SDimitry Andric  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
2770b57cec5SDimitry Andric                             SSEPackedSingle>, XS;
2780b57cec5SDimitry Andric  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
2790b57cec5SDimitry Andric                             SSEPackedDouble>, XD;
2800b57cec5SDimitry Andric}
2810b57cec5SDimitry Andric
2820b57cec5SDimitry Andric// Patterns
2830b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
2840b57cec5SDimitry Andric  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2850b57cec5SDimitry Andric            (VMOVSSrm addr:$src)>;
2860b57cec5SDimitry Andric  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2870b57cec5SDimitry Andric            (VMOVSDrm addr:$src)>;
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric  // Represent the same patterns above but in the form they appear for
2900b57cec5SDimitry Andric  // 256-bit types
2910b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzload32 addr:$src)),
2920b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
2930b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzload64 addr:$src)),
2940b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
2950b57cec5SDimitry Andric}
2960b57cec5SDimitry Andric
2970b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
2980b57cec5SDimitry Andric  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
2990b57cec5SDimitry Andric  // MOVSS to the lower bits.
3000b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
3010b57cec5SDimitry Andric            (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
3020b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
3030b57cec5SDimitry Andric            (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric  // Move low f32 and clear high bits.
3060b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
3070b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
3080b57cec5SDimitry Andric             (v4f32 (VMOVSSrr (v4f32 (V_SET0)),
3090b57cec5SDimitry Andric              (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>;
3100b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
3110b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
3120b57cec5SDimitry Andric             (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
3130b57cec5SDimitry Andric              (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
3140b57cec5SDimitry Andric}
3150b57cec5SDimitry Andric
3160b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
3170b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a
3180b57cec5SDimitry Andric// MOVSS to the lower bits.
3190b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
3200b57cec5SDimitry Andric          (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
3210b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
3220b57cec5SDimitry Andric          (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
3230b57cec5SDimitry Andric}
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andriclet Predicates = [UseSSE2] in
3260b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3270b57cec5SDimitry Andric          (MOVSDrm addr:$src)>;
3280b57cec5SDimitry Andric
3290b57cec5SDimitry Andriclet Predicates = [UseSSE1] in
3300b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3310b57cec5SDimitry Andric          (MOVSSrm addr:$src)>;
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3340b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
3350b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3360b57cec5SDimitry Andric
3370b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC,
3380b57cec5SDimitry Andric                            X86MemOperand x86memop, PatFrag ld_frag,
3390b57cec5SDimitry Andric                            string asm, Domain d,
3400b57cec5SDimitry Andric                            X86SchedWriteMoveLS sched> {
3410b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in
3420b57cec5SDimitry Andric  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3430b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
3440b57cec5SDimitry Andric           Sched<[sched.RR]>;
3450b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in
3460b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3470b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3480b57cec5SDimitry Andric                   [(set RC:$dst, (ld_frag addr:$src))], d>,
3490b57cec5SDimitry Andric           Sched<[sched.RM]>;
3500b57cec5SDimitry Andric}
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3530b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
3540b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
35506c3fb27SDimitry Andric                                PS, VEX, WIG;
3560b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
3570b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
35806c3fb27SDimitry Andric                                PD, VEX, WIG;
3590b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
3600b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
36106c3fb27SDimitry Andric                                PS, VEX, WIG;
3620b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
3630b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
36406c3fb27SDimitry Andric                                PD, VEX, WIG;
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
3670b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
36806c3fb27SDimitry Andric                                 PS, VEX, VEX_L, WIG;
3690b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
3700b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
37106c3fb27SDimitry Andric                                 PD, VEX, VEX_L, WIG;
3720b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
3730b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
37406c3fb27SDimitry Andric                                 PS, VEX, VEX_L, WIG;
3750b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
3760b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
37706c3fb27SDimitry Andric                                 PD, VEX, VEX_L, WIG;
3780b57cec5SDimitry Andric}
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
3810b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
3820b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3830b57cec5SDimitry Andric                               PS;
3840b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
3850b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3860b57cec5SDimitry Andric                               PS;
3870b57cec5SDimitry Andric}
3880b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
3890b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
3900b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3910b57cec5SDimitry Andric                               PD;
3920b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
3930b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3940b57cec5SDimitry Andric                               PD;
3950b57cec5SDimitry Andric}
3960b57cec5SDimitry Andric
3970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX]  in {
3980b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
3990b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4000b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
4010b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
40206c3fb27SDimitry Andric                   VEX, WIG;
4030b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4040b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
4050b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
40606c3fb27SDimitry Andric                   VEX, WIG;
4070b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4080b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
4090b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>,
41006c3fb27SDimitry Andric                   VEX, WIG;
4110b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4120b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
4130b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>,
41406c3fb27SDimitry Andric                   VEX, WIG;
4150b57cec5SDimitry Andric} // SchedRW
4160b57cec5SDimitry Andric
4170b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
4180b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4190b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
4200b57cec5SDimitry Andric                   [(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
42106c3fb27SDimitry Andric                   VEX, VEX_L, WIG;
4220b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4230b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
4240b57cec5SDimitry Andric                   [(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
42506c3fb27SDimitry Andric                   VEX, VEX_L, WIG;
4260b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4270b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
4280b57cec5SDimitry Andric                   [(store (v8f32 VR256:$src), addr:$dst)]>,
42906c3fb27SDimitry Andric                   VEX, VEX_L, WIG;
4300b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4310b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
4320b57cec5SDimitry Andric                   [(store (v4f64 VR256:$src), addr:$dst)]>,
43306c3fb27SDimitry Andric                   VEX, VEX_L, WIG;
4340b57cec5SDimitry Andric} // SchedRW
4350b57cec5SDimitry Andric} // Predicate
4360b57cec5SDimitry Andric
4370b57cec5SDimitry Andric// For disassembler
4380b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
4390b57cec5SDimitry Andric    isMoveReg = 1 in {
4400b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
4410b57cec5SDimitry Andric  def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
4420b57cec5SDimitry Andric                          (ins VR128:$src),
4430b57cec5SDimitry Andric                          "movaps\t{$src, $dst|$dst, $src}", []>,
44406c3fb27SDimitry Andric                          VEX, WIG;
4450b57cec5SDimitry Andric  def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
4460b57cec5SDimitry Andric                           (ins VR128:$src),
4470b57cec5SDimitry Andric                           "movapd\t{$src, $dst|$dst, $src}", []>,
44806c3fb27SDimitry Andric                           VEX, WIG;
4490b57cec5SDimitry Andric  def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
4500b57cec5SDimitry Andric                           (ins VR128:$src),
4510b57cec5SDimitry Andric                           "movups\t{$src, $dst|$dst, $src}", []>,
45206c3fb27SDimitry Andric                           VEX, WIG;
4530b57cec5SDimitry Andric  def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
4540b57cec5SDimitry Andric                           (ins VR128:$src),
4550b57cec5SDimitry Andric                           "movupd\t{$src, $dst|$dst, $src}", []>,
45606c3fb27SDimitry Andric                           VEX, WIG;
4570b57cec5SDimitry Andric} // SchedRW
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
4600b57cec5SDimitry Andric  def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
4610b57cec5SDimitry Andric                            (ins VR256:$src),
4620b57cec5SDimitry Andric                            "movaps\t{$src, $dst|$dst, $src}", []>,
46306c3fb27SDimitry Andric                            VEX, VEX_L, WIG;
4640b57cec5SDimitry Andric  def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
4650b57cec5SDimitry Andric                            (ins VR256:$src),
4660b57cec5SDimitry Andric                            "movapd\t{$src, $dst|$dst, $src}", []>,
46706c3fb27SDimitry Andric                            VEX, VEX_L, WIG;
4680b57cec5SDimitry Andric  def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
4690b57cec5SDimitry Andric                            (ins VR256:$src),
4700b57cec5SDimitry Andric                            "movups\t{$src, $dst|$dst, $src}", []>,
47106c3fb27SDimitry Andric                            VEX, VEX_L, WIG;
4720b57cec5SDimitry Andric  def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
4730b57cec5SDimitry Andric                            (ins VR256:$src),
4740b57cec5SDimitry Andric                            "movupd\t{$src, $dst|$dst, $src}", []>,
47506c3fb27SDimitry Andric                            VEX, VEX_L, WIG;
4760b57cec5SDimitry Andric} // SchedRW
4770b57cec5SDimitry Andric} // Predicate
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
4800b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
4810b57cec5SDimitry Andric                (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
4820b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
4830b57cec5SDimitry Andric                (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
4840b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
4850b57cec5SDimitry Andric                (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
4860b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
4870b57cec5SDimitry Andric                (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
4880b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
4890b57cec5SDimitry Andric                (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>;
4900b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
4910b57cec5SDimitry Andric                (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>;
4920b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
4930b57cec5SDimitry Andric                (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>;
4940b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
4950b57cec5SDimitry Andric                (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>;
4960b57cec5SDimitry Andric
4970b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
4980b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4990b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
5000b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
5010b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5020b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
5030b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
5040b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5050b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
5060b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>;
5070b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5080b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
5090b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>;
5100b57cec5SDimitry Andric} // SchedRW
5110b57cec5SDimitry Andric
5120b57cec5SDimitry Andric// For disassembler
5130b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
5140b57cec5SDimitry Andric    isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
5150b57cec5SDimitry Andric  def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
51606c3fb27SDimitry Andric                         "movaps\t{$src, $dst|$dst, $src}", []>;
5170b57cec5SDimitry Andric  def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
51806c3fb27SDimitry Andric                         "movapd\t{$src, $dst|$dst, $src}", []>;
5190b57cec5SDimitry Andric  def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
52006c3fb27SDimitry Andric                         "movups\t{$src, $dst|$dst, $src}", []>;
5210b57cec5SDimitry Andric  def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
52206c3fb27SDimitry Andric                         "movupd\t{$src, $dst|$dst, $src}", []>;
5230b57cec5SDimitry Andric}
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
5260b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}",
5270b57cec5SDimitry Andric                (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
5280b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}",
5290b57cec5SDimitry Andric                (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
5300b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}",
5310b57cec5SDimitry Andric                (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
5320b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}",
5330b57cec5SDimitry Andric                (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
5340b57cec5SDimitry Andric
5350b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5360b57cec5SDimitry Andric  // 256-bit load/store need to use floating point load/store in case we don't
5370b57cec5SDimitry Andric  // have AVX2. Execution domain fixing will convert to integer if AVX2 is
5380b57cec5SDimitry Andric  // available and changing the domain is beneficial.
5390b57cec5SDimitry Andric  def : Pat<(alignedloadv4i64 addr:$src),
5400b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5410b57cec5SDimitry Andric  def : Pat<(alignedloadv8i32 addr:$src),
5420b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5430b57cec5SDimitry Andric  def : Pat<(alignedloadv16i16 addr:$src),
5440b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5450b57cec5SDimitry Andric  def : Pat<(alignedloadv32i8 addr:$src),
5460b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5470b57cec5SDimitry Andric  def : Pat<(loadv4i64 addr:$src),
5480b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5490b57cec5SDimitry Andric  def : Pat<(loadv8i32 addr:$src),
5500b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5510b57cec5SDimitry Andric  def : Pat<(loadv16i16 addr:$src),
5520b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5530b57cec5SDimitry Andric  def : Pat<(loadv32i8 addr:$src),
5540b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5550b57cec5SDimitry Andric
5560b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
5570b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5580b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
5590b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5600b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
5610b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5620b57cec5SDimitry Andric  def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
5630b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5640b57cec5SDimitry Andric  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
5650b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5660b57cec5SDimitry Andric  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
5670b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5680b57cec5SDimitry Andric  def : Pat<(store (v16i16 VR256:$src), addr:$dst),
5690b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5700b57cec5SDimitry Andric  def : Pat<(store (v32i8 VR256:$src), addr:$dst),
5710b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
572fcaf7f86SDimitry Andric
573fcaf7f86SDimitry Andric  def : Pat<(alignedloadv8f16 addr:$src),
574fcaf7f86SDimitry Andric            (VMOVAPSrm addr:$src)>;
5751ac55f4cSDimitry Andric  def : Pat<(alignedloadv8bf16 addr:$src),
5761ac55f4cSDimitry Andric            (VMOVAPSrm addr:$src)>;
577fcaf7f86SDimitry Andric  def : Pat<(loadv8f16 addr:$src),
578fcaf7f86SDimitry Andric            (VMOVUPSrm addr:$src)>;
5791ac55f4cSDimitry Andric  def : Pat<(loadv8bf16 addr:$src),
5801ac55f4cSDimitry Andric            (VMOVUPSrm addr:$src)>;
581fcaf7f86SDimitry Andric  def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
582fcaf7f86SDimitry Andric            (VMOVAPSmr addr:$dst, VR128:$src)>;
5831ac55f4cSDimitry Andric  def : Pat<(alignedstore (v8bf16 VR128:$src), addr:$dst),
5841ac55f4cSDimitry Andric            (VMOVAPSmr addr:$dst, VR128:$src)>;
585fcaf7f86SDimitry Andric  def : Pat<(store (v8f16 VR128:$src), addr:$dst),
586fcaf7f86SDimitry Andric            (VMOVUPSmr addr:$dst, VR128:$src)>;
5871ac55f4cSDimitry Andric  def : Pat<(store (v8bf16 VR128:$src), addr:$dst),
5881ac55f4cSDimitry Andric            (VMOVUPSmr addr:$dst, VR128:$src)>;
5891ac55f4cSDimitry Andric
590fcaf7f86SDimitry Andric  def : Pat<(alignedloadv16f16 addr:$src),
591fcaf7f86SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5921ac55f4cSDimitry Andric  def : Pat<(alignedloadv16bf16 addr:$src),
5931ac55f4cSDimitry Andric            (VMOVAPSYrm addr:$src)>;
594fcaf7f86SDimitry Andric  def : Pat<(loadv16f16 addr:$src),
595fcaf7f86SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5961ac55f4cSDimitry Andric  def : Pat<(loadv16bf16 addr:$src),
5971ac55f4cSDimitry Andric            (VMOVUPSYrm addr:$src)>;
598fcaf7f86SDimitry Andric  def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst),
599fcaf7f86SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
6001ac55f4cSDimitry Andric  def : Pat<(alignedstore (v16bf16 VR256:$src), addr:$dst),
6011ac55f4cSDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
602fcaf7f86SDimitry Andric  def : Pat<(store (v16f16 VR256:$src), addr:$dst),
603fcaf7f86SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
6041ac55f4cSDimitry Andric  def : Pat<(store (v16bf16 VR256:$src), addr:$dst),
6051ac55f4cSDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
6060b57cec5SDimitry Andric}
6070b57cec5SDimitry Andric
6080b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter).
6090b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU
6100b57cec5SDimitry Andric// during the SSE domain pass.
6110b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
6120b57cec5SDimitry Andric  def : Pat<(alignedloadv2i64 addr:$src),
6130b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
6140b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
6150b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
6160b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
6170b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
6180b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
6190b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
6200b57cec5SDimitry Andric  def : Pat<(loadv2i64 addr:$src),
6210b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
6220b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
6230b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
6240b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
6250b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
6260b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
6270b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
6280b57cec5SDimitry Andric
6290b57cec5SDimitry Andric  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
6300b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6310b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
6320b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6330b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
6340b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6350b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
6360b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6370b57cec5SDimitry Andric  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
6380b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6390b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
6400b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6410b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
6420b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6430b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
6440b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6450b57cec5SDimitry Andric}
6460b57cec5SDimitry Andric
647fcaf7f86SDimitry Andriclet Predicates = [UseSSE2] in {
648fcaf7f86SDimitry Andric  def : Pat<(alignedloadv8f16 addr:$src),
649fcaf7f86SDimitry Andric            (MOVAPSrm addr:$src)>;
650fcaf7f86SDimitry Andric  def : Pat<(loadv8f16 addr:$src),
651fcaf7f86SDimitry Andric            (MOVUPSrm addr:$src)>;
652fcaf7f86SDimitry Andric  def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
653fcaf7f86SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
654fcaf7f86SDimitry Andric  def : Pat<(store (v8f16 VR128:$src), addr:$dst),
655fcaf7f86SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
656fcaf7f86SDimitry Andric}
657fcaf7f86SDimitry Andric
6580b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6590b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions
6600b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6610b57cec5SDimitry Andric
662fe6060f1SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode,
6630b57cec5SDimitry Andric                                      string base_opc, string asm_opr> {
6640b57cec5SDimitry Andric  // No pattern as they need be special cased between high and low.
6650b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
6660b57cec5SDimitry Andric  def PSrm : PI<opc, MRMSrcMem,
6670b57cec5SDimitry Andric                (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
6680b57cec5SDimitry Andric                !strconcat(base_opc, "s", asm_opr),
6690b57cec5SDimitry Andric                [], SSEPackedSingle>, PS,
6700b57cec5SDimitry Andric                Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
6710b57cec5SDimitry Andric
6720b57cec5SDimitry Andric  def PDrm : PI<opc, MRMSrcMem,
6730b57cec5SDimitry Andric         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
6740b57cec5SDimitry Andric         !strconcat(base_opc, "d", asm_opr),
6750b57cec5SDimitry Andric     [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
6760b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src2)))))],
6770b57cec5SDimitry Andric              SSEPackedDouble>, PD,
6780b57cec5SDimitry Andric     Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
6790b57cec5SDimitry Andric}
6800b57cec5SDimitry Andric
6810b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
6820b57cec5SDimitry Andric                                 string base_opc> {
6830b57cec5SDimitry Andric  let Predicates = [UseAVX] in
6840b57cec5SDimitry Andric    defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
6850b57cec5SDimitry Andric                                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
68606c3fb27SDimitry Andric                                    VEX_4V, WIG;
6870b57cec5SDimitry Andric
6880b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
6890b57cec5SDimitry Andric    defm NAME : sse12_mov_hilo_packed_base<opc,  pdnode, base_opc,
6900b57cec5SDimitry Andric                                    "\t{$src2, $dst|$dst, $src2}">;
6910b57cec5SDimitry Andric}
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
6940b57cec5SDimitry Andric
6950b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
6960b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
6970b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
6980b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6990b57cec5SDimitry Andric                     "movlps\t{$src, $dst|$dst, $src}",
7000b57cec5SDimitry Andric                     []>,
70106c3fb27SDimitry Andric                     VEX, WIG;
7020b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7030b57cec5SDimitry Andric                     "movlpd\t{$src, $dst|$dst, $src}",
7040b57cec5SDimitry Andric                     [(store (f64 (extractelt (v2f64 VR128:$src),
7050b57cec5SDimitry Andric                                   (iPTR 0))), addr:$dst)]>,
70606c3fb27SDimitry Andric                     VEX, WIG;
7070b57cec5SDimitry Andric}// UseAVX
7080b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
7090b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7100b57cec5SDimitry Andric                   "movlps\t{$src, $dst|$dst, $src}",
7110b57cec5SDimitry Andric                   []>;
7120b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7130b57cec5SDimitry Andric                   "movlpd\t{$src, $dst|$dst, $src}",
7140b57cec5SDimitry Andric                   [(store (f64 (extractelt (v2f64 VR128:$src),
7150b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
7160b57cec5SDimitry Andric} // SchedRW
7170b57cec5SDimitry Andric
7180b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
7190b57cec5SDimitry Andric  // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
7200b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
7210b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
7228bcb0991SDimitry Andric  def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1,
7230b57cec5SDimitry Andric                      (i8 -28)),
7240b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
7250b57cec5SDimitry Andric  def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
7260b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
7270b57cec5SDimitry Andric
7280b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzload64 addr:$src)),
7290b57cec5SDimitry Andric            (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
7300b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
7310b57cec5SDimitry Andric            (MOVLPSmr addr:$dst, VR128:$src)>;
7320b57cec5SDimitry Andric}
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7350b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions
7360b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7370b57cec5SDimitry Andric
7380b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
7390b57cec5SDimitry Andric
7400b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
7410b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low
7420b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible.
7430b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
7440b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
7450b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7460b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
74706c3fb27SDimitry Andric                   []>, VEX, WIG;
7480b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7490b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
7500b57cec5SDimitry Andric                   [(store (f64 (extractelt
7510b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
75206c3fb27SDimitry Andric                                 (iPTR 0))), addr:$dst)]>, VEX, WIG;
7530b57cec5SDimitry Andric} // UseAVX
7540b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
7550b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7560b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
7570b57cec5SDimitry Andric                   []>;
7580b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7590b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
7600b57cec5SDimitry Andric                   [(store (f64 (extractelt
7610b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
7620b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
7630b57cec5SDimitry Andric} // SchedRW
7640b57cec5SDimitry Andric
7650b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
7665ffd83dbSDimitry Andric  // MOVHPD patterns
7670b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
7680b57cec5SDimitry Andric            (VMOVHPDrm VR128:$src1, addr:$src2)>;
7690b57cec5SDimitry Andric
7700b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
7710b57cec5SDimitry Andric                          (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
7720b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
7730b57cec5SDimitry Andric            (VMOVHPDmr addr:$dst, VR128:$src)>;
7740b57cec5SDimitry Andric
7750b57cec5SDimitry Andric  // MOVLPD patterns
7760b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
7770b57cec5SDimitry Andric            (VMOVLPDrm VR128:$src1, addr:$src2)>;
7780b57cec5SDimitry Andric}
7790b57cec5SDimitry Andric
7800b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
7810b57cec5SDimitry Andric  // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
7820b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
7830b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
7848bcb0991SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))),
7850b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
7860b57cec5SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
7870b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
7880b57cec5SDimitry Andric
7890b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
7900b57cec5SDimitry Andric                                addr:$dst),
7910b57cec5SDimitry Andric            (MOVHPSmr addr:$dst, VR128:$src)>;
7920b57cec5SDimitry Andric}
7930b57cec5SDimitry Andric
7940b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
7950b57cec5SDimitry Andric  // MOVHPD patterns
7960b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
7970b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
7980b57cec5SDimitry Andric
7990b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
8000b57cec5SDimitry Andric                          (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
8010b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
8020b57cec5SDimitry Andric            (MOVHPDmr addr:$dst, VR128:$src)>;
8030b57cec5SDimitry Andric
8040b57cec5SDimitry Andric  // MOVLPD patterns
8050b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
8060b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
8070b57cec5SDimitry Andric}
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
8100b57cec5SDimitry Andric  // Use MOVLPD to load into the low bits from a full vector unless we can use
8110b57cec5SDimitry Andric  // BLENDPD.
8128bcb0991SDimitry Andric  def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))),
8130b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
8140b57cec5SDimitry Andric}
8150b57cec5SDimitry Andric
8160b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8170b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
8180b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
8210b57cec5SDimitry Andric  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
8220b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
8230b57cec5SDimitry Andric                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8240b57cec5SDimitry Andric                      [(set VR128:$dst,
8250b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
82606c3fb27SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
8270b57cec5SDimitry Andric  let isCommutable = 1 in
8280b57cec5SDimitry Andric  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
8290b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
8300b57cec5SDimitry Andric                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8310b57cec5SDimitry Andric                      [(set VR128:$dst,
8320b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
83306c3fb27SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
8340b57cec5SDimitry Andric}
8350b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
8360b57cec5SDimitry Andric  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
8370b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
8380b57cec5SDimitry Andric                      "movlhps\t{$src2, $dst|$dst, $src2}",
8390b57cec5SDimitry Andric                      [(set VR128:$dst,
8400b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
8410b57cec5SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>;
8420b57cec5SDimitry Andric  let isCommutable = 1 in
8430b57cec5SDimitry Andric  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
8440b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
8450b57cec5SDimitry Andric                      "movhlps\t{$src2, $dst|$dst, $src2}",
8460b57cec5SDimitry Andric                      [(set VR128:$dst,
8470b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
84806c3fb27SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>;
8490b57cec5SDimitry Andric}
8500b57cec5SDimitry Andric
8510b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8520b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions
8530b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
856fe6060f1SDimitry Andric                     SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag,
8570b57cec5SDimitry Andric                     string asm, string mem, X86FoldableSchedWrite sched,
858480093f4SDimitry Andric                     Domain d,
8590b57cec5SDimitry Andric                     SchedRead Int2Fpu = ReadDefault> {
860480093f4SDimitry Andric  let ExeDomain = d in {
8610b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
8620b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
8630b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
8640b57cec5SDimitry Andric              Sched<[sched, Int2Fpu]>;
8650b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
8660b57cec5SDimitry Andric              mem#"\t{$src, $dst|$dst, $src}",
8670b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
8680b57cec5SDimitry Andric              Sched<[sched.Folded]>;
8690b57cec5SDimitry Andric  }
870480093f4SDimitry Andric}
8710b57cec5SDimitry Andric
8720b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
8730b57cec5SDimitry Andric                       ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
8740b57cec5SDimitry Andric                       string asm, Domain d, X86FoldableSchedWrite sched> {
875480093f4SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
8760b57cec5SDimitry Andric  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
877480093f4SDimitry Andric             [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>,
8780b57cec5SDimitry Andric             Sched<[sched]>;
8790b57cec5SDimitry Andric  let mayLoad = 1 in
8800b57cec5SDimitry Andric  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
881480093f4SDimitry Andric             [(set RC:$dst, (DstTy (any_sint_to_fp
8820b57cec5SDimitry Andric                                    (SrcTy (ld_frag addr:$src)))))], d>,
8830b57cec5SDimitry Andric             Sched<[sched.Folded]>;
8840b57cec5SDimitry Andric}
8850b57cec5SDimitry Andric}
8860b57cec5SDimitry Andric
8870b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
8880b57cec5SDimitry Andric                          X86MemOperand x86memop, string asm, string mem,
889480093f4SDimitry Andric                          X86FoldableSchedWrite sched, Domain d> {
890480093f4SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
8910b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
8920b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
8930b57cec5SDimitry Andric              Sched<[sched, ReadDefault, ReadInt2Fpu]>;
8940b57cec5SDimitry Andric  let mayLoad = 1 in
8950b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
8960b57cec5SDimitry Andric              (ins DstRC:$src1, x86memop:$src),
8970b57cec5SDimitry Andric              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
8980b57cec5SDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>;
8990b57cec5SDimitry Andric} // hasSideEffects = 0
9000b57cec5SDimitry Andric}
9010b57cec5SDimitry Andric
902480093f4SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
903480093f4SDimitry Andricdefm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
9040b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
905480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>,
9060b57cec5SDimitry Andric                                XS, VEX, VEX_LIG;
907480093f4SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
9080b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
909480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>,
91006c3fb27SDimitry Andric                                XS, VEX, REX_W, VEX_LIG;
911480093f4SDimitry Andricdefm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
9120b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
913480093f4SDimitry Andric                                WriteCvtSD2I, SSEPackedDouble>,
9140b57cec5SDimitry Andric                                XD, VEX, VEX_LIG;
915480093f4SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
9160b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
917480093f4SDimitry Andric                                WriteCvtSD2I, SSEPackedDouble>,
91806c3fb27SDimitry Andric                                XD, VEX, REX_W, VEX_LIG;
9195ffd83dbSDimitry Andric
9205ffd83dbSDimitry Andricdefm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
9215ffd83dbSDimitry Andric                               "cvtss2si", "cvtss2si",
9225ffd83dbSDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>,
9235ffd83dbSDimitry Andric                               XS, VEX, VEX_LIG;
9245ffd83dbSDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
9255ffd83dbSDimitry Andric                               "cvtss2si", "cvtss2si",
9265ffd83dbSDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>,
92706c3fb27SDimitry Andric                               XS, VEX, REX_W, VEX_LIG;
9285ffd83dbSDimitry Andricdefm VCVTSD2SI   : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
9295ffd83dbSDimitry Andric                               "cvtsd2si", "cvtsd2si",
9305ffd83dbSDimitry Andric                               WriteCvtSD2I, SSEPackedDouble>,
9315ffd83dbSDimitry Andric                               XD, VEX, VEX_LIG;
9325ffd83dbSDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
9335ffd83dbSDimitry Andric                               "cvtsd2si", "cvtsd2si",
9345ffd83dbSDimitry Andric                               WriteCvtSD2I, SSEPackedDouble>,
93506c3fb27SDimitry Andric                               XD, VEX, REX_W, VEX_LIG;
9360b57cec5SDimitry Andric}
9370b57cec5SDimitry Andric
9380b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx
9390b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands,
9400b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly
9410b57cec5SDimitry Andric// where appropriate to do so.
9420b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
9430b57cec5SDimitry Andricdefm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
944480093f4SDimitry Andric                                  WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
945480093f4SDimitry Andric                                  VEX_LIG, SIMD_EXC;
9460b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
947480093f4SDimitry Andric                                  WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
94806c3fb27SDimitry Andric                                  REX_W, VEX_LIG, SIMD_EXC;
9490b57cec5SDimitry Andricdefm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
950480093f4SDimitry Andric                                  WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
951480093f4SDimitry Andric                                  VEX_LIG;
9520b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
953480093f4SDimitry Andric                                  WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
95406c3fb27SDimitry Andric                                  REX_W, VEX_LIG, SIMD_EXC;
9550b57cec5SDimitry Andric} // isCodeGenOnly = 1
9560b57cec5SDimitry Andric
9570b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
958480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
9590b57cec5SDimitry Andric            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
960480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
9610b57cec5SDimitry Andric            (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
962480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
9630b57cec5SDimitry Andric            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
964480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
9650b57cec5SDimitry Andric            (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
9660b57cec5SDimitry Andric
967480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp GR32:$src)),
9680b57cec5SDimitry Andric            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
969480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp GR64:$src)),
9700b57cec5SDimitry Andric            (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
971480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp GR32:$src)),
9720b57cec5SDimitry Andric            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
973480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp GR64:$src)),
9740b57cec5SDimitry Andric            (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
9755ffd83dbSDimitry Andric
9765ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
9775ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
9785ffd83dbSDimitry Andric
9795ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
9805ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
9810b57cec5SDimitry Andric}
9820b57cec5SDimitry Andric
9830b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
984480093f4SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
9850b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
986480093f4SDimitry Andric                      WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
987480093f4SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
9880b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
989480093f4SDimitry Andric                      WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
990480093f4SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
9910b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
992480093f4SDimitry Andric                      WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
993480093f4SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
9940b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
995480093f4SDimitry Andric                      WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
9965ffd83dbSDimitry Andric
9975ffd83dbSDimitry Andricdefm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
9985ffd83dbSDimitry Andric                     "cvtss2si", "cvtss2si",
9995ffd83dbSDimitry Andric                     WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
10005ffd83dbSDimitry Andricdefm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
10015ffd83dbSDimitry Andric                     "cvtss2si", "cvtss2si",
10025ffd83dbSDimitry Andric                     WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
10035ffd83dbSDimitry Andricdefm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
10045ffd83dbSDimitry Andric                     "cvtsd2si", "cvtsd2si",
10055ffd83dbSDimitry Andric                     WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
10065ffd83dbSDimitry Andricdefm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
10075ffd83dbSDimitry Andric                     "cvtsd2si", "cvtsd2si",
10085ffd83dbSDimitry Andric                     WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
10095ffd83dbSDimitry Andric
1010480093f4SDimitry Andricdefm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
10110b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{l}",
1012480093f4SDimitry Andric                      WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
1013480093f4SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
10140b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{q}",
1015480093f4SDimitry Andric                      WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
1016480093f4SDimitry Andricdefm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
10170b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{l}",
1018480093f4SDimitry Andric                      WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
1019480093f4SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
10200b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{q}",
1021480093f4SDimitry Andric                      WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
10220b57cec5SDimitry Andric} // isCodeGenOnly = 1
10230b57cec5SDimitry Andric
10245ffd83dbSDimitry Andriclet Predicates = [UseSSE1] in {
10255ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
10265ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
10275ffd83dbSDimitry Andric}
10285ffd83dbSDimitry Andric
10295ffd83dbSDimitry Andriclet Predicates = [UseSSE2] in {
10305ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
10315ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
10325ffd83dbSDimitry Andric}
10335ffd83dbSDimitry Andric
10340b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM
10350b57cec5SDimitry Andric// and/or XMM operand(s).
10360b57cec5SDimitry Andric
10370b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
10380b57cec5SDimitry Andric                          ValueType DstVT, ValueType SrcVT, SDNode OpNode,
10395ffd83dbSDimitry Andric                          Operand memop, PatFrags mem_frags, string asm,
1040480093f4SDimitry Andric                          X86FoldableSchedWrite sched, Domain d> {
1041480093f4SDimitry Andriclet ExeDomain = d in {
10420b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
10430b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
10440b57cec5SDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
10450b57cec5SDimitry Andric               Sched<[sched]>;
10460b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
10470b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
10485ffd83dbSDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>,
10490b57cec5SDimitry Andric               Sched<[sched.Folded]>;
10500b57cec5SDimitry Andric}
1051480093f4SDimitry Andric}
10520b57cec5SDimitry Andric
10530b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
10540b57cec5SDimitry Andric                    RegisterClass DstRC, X86MemOperand x86memop,
10550b57cec5SDimitry Andric                    string asm, string mem, X86FoldableSchedWrite sched,
1056480093f4SDimitry Andric                    Domain d, bit Is2Addr = 1> {
1057480093f4SDimitry Andriclet hasSideEffects = 0, ExeDomain = d in {
10580b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
10590b57cec5SDimitry Andric                  !if(Is2Addr,
10600b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
10610b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
10620b57cec5SDimitry Andric                  []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
10630b57cec5SDimitry Andric  let mayLoad = 1 in
10640b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
10650b57cec5SDimitry Andric                  (ins DstRC:$src1, x86memop:$src2),
10660b57cec5SDimitry Andric                  !if(Is2Addr,
10670b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
10680b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10690b57cec5SDimitry Andric                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
10700b57cec5SDimitry Andric}
10710b57cec5SDimitry Andric}
10720b57cec5SDimitry Andric
1073480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
10740b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
10750b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
10760b57cec5SDimitry Andric                  X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
1077480093f4SDimitry Andric                  WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
10780b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
10790b57cec5SDimitry Andric                    X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
108006c3fb27SDimitry Andric                    WriteCvtSD2I, SSEPackedDouble>, XD, VEX, REX_W, VEX_LIG;
10810b57cec5SDimitry Andric}
10820b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
1083480093f4SDimitry Andric                 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
1084480093f4SDimitry Andric                 SSEPackedDouble>, XD;
10850b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
1086480093f4SDimitry Andric                   sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
1087480093f4SDimitry Andric                   SSEPackedDouble>, XD, REX_W;
1088480093f4SDimitry Andric}
10890b57cec5SDimitry Andric
10900b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
10910b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1092480093f4SDimitry Andric          i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
1093480093f4SDimitry Andric          XS, VEX_4V, VEX_LIG, SIMD_EXC;
10940b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1095480093f4SDimitry Andric          i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
109606c3fb27SDimitry Andric          XS, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
10970b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1098480093f4SDimitry Andric          i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
1099480093f4SDimitry Andric          XD, VEX_4V, VEX_LIG;
11000b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1101480093f4SDimitry Andric          i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
110206c3fb27SDimitry Andric          XD, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
11030b57cec5SDimitry Andric}
11040b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
11050b57cec5SDimitry Andric  defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1106480093f4SDimitry Andric                        i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
1107480093f4SDimitry Andric                        XS, SIMD_EXC;
11080b57cec5SDimitry Andric  defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1109480093f4SDimitry Andric                        i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
1110480093f4SDimitry Andric                        XS, REX_W, SIMD_EXC;
11110b57cec5SDimitry Andric  defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1112480093f4SDimitry Andric                        i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
1113480093f4SDimitry Andric                        XD;
11140b57cec5SDimitry Andric  defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1115480093f4SDimitry Andric                        i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
1116480093f4SDimitry Andric                        XD, REX_W, SIMD_EXC;
11170b57cec5SDimitry Andric}
11180b57cec5SDimitry Andric
11190b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11200b57cec5SDimitry Andric               (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
11210b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11220b57cec5SDimitry Andric               (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
11230b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11240b57cec5SDimitry Andric               (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
11250b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11260b57cec5SDimitry Andric               (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
11270b57cec5SDimitry Andric
11280b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
11290b57cec5SDimitry Andric              (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
11300b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
11310b57cec5SDimitry Andric              (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
11320b57cec5SDimitry Andric
11330b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
11340b57cec5SDimitry Andric                (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
11350b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
11360b57cec5SDimitry Andric                (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
11370b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
11380b57cec5SDimitry Andric                (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
11390b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
11400b57cec5SDimitry Andric                (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
11410b57cec5SDimitry Andric
11420b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
11430b57cec5SDimitry Andric                (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
11440b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
11450b57cec5SDimitry Andric                (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
11460b57cec5SDimitry Andric
11470b57cec5SDimitry Andric/// SSE 1 Only
11480b57cec5SDimitry Andric
11490b57cec5SDimitry Andric// Aliases for intrinsics
1150480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
11510b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
11520b57cec5SDimitry Andric                                ssmem, sse_load_f32, "cvttss2si",
1153480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
11540b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
11550b57cec5SDimitry Andric                               X86cvtts2Int, ssmem, sse_load_f32,
1156480093f4SDimitry Andric                               "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
115706c3fb27SDimitry Andric                               XS, VEX, VEX_LIG, REX_W;
11580b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
11590b57cec5SDimitry Andric                                sdmem, sse_load_f64, "cvttsd2si",
1160480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
11610b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
11620b57cec5SDimitry Andric                              X86cvtts2Int, sdmem, sse_load_f64,
1163480093f4SDimitry Andric                              "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
116406c3fb27SDimitry Andric                              XD, VEX, VEX_LIG, REX_W;
11650b57cec5SDimitry Andric}
1166480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
11670b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
11680b57cec5SDimitry Andric                                    ssmem, sse_load_f32, "cvttss2si",
1169480093f4SDimitry Andric                                    WriteCvtSS2I, SSEPackedSingle>, XS;
11700b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
11710b57cec5SDimitry Andric                                   X86cvtts2Int, ssmem, sse_load_f32,
1172480093f4SDimitry Andric                                   "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
1173480093f4SDimitry Andric                                   XS, REX_W;
11740b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
11750b57cec5SDimitry Andric                                    sdmem, sse_load_f64, "cvttsd2si",
1176480093f4SDimitry Andric                                    WriteCvtSD2I, SSEPackedDouble>, XD;
11770b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
11780b57cec5SDimitry Andric                                  X86cvtts2Int, sdmem, sse_load_f64,
1179480093f4SDimitry Andric                                  "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
1180480093f4SDimitry Andric                                  XD, REX_W;
1181480093f4SDimitry Andric}
11820b57cec5SDimitry Andric
11830b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
11840b57cec5SDimitry Andric                (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11850b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
11860b57cec5SDimitry Andric                (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
11870b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
11880b57cec5SDimitry Andric                (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11890b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
11900b57cec5SDimitry Andric                (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
11910b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
11920b57cec5SDimitry Andric                (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11930b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
11940b57cec5SDimitry Andric                (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
11950b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
11960b57cec5SDimitry Andric                (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11970b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
11980b57cec5SDimitry Andric                (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
11990b57cec5SDimitry Andric
12000b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
12010b57cec5SDimitry Andric                (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12020b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
12030b57cec5SDimitry Andric                (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
12040b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
12050b57cec5SDimitry Andric                (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12060b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
12070b57cec5SDimitry Andric                (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
12080b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
12090b57cec5SDimitry Andric                (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12100b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
12110b57cec5SDimitry Andric                (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
12120b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
12130b57cec5SDimitry Andric                (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12140b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
12150b57cec5SDimitry Andric                (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
12160b57cec5SDimitry Andric
1217480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
12180b57cec5SDimitry Andricdefm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
12190b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
1220480093f4SDimitry Andric                                  WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
12210b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
12220b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
122306c3fb27SDimitry Andric                                  WriteCvtSS2I, SSEPackedSingle>, XS, VEX, REX_W, VEX_LIG;
12240b57cec5SDimitry Andric}
1225480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
12260b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
12270b57cec5SDimitry Andric                               ssmem, sse_load_f32, "cvtss2si",
1228480093f4SDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>, XS;
12290b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
12300b57cec5SDimitry Andric                                 ssmem, sse_load_f32, "cvtss2si",
1231480093f4SDimitry Andric                                 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
12320b57cec5SDimitry Andric
12330b57cec5SDimitry Andricdefm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
12340b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
12350b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PS>,
123606c3fb27SDimitry Andric                               PS, VEX, Requires<[HasAVX, NoVLX]>, WIG;
12370b57cec5SDimitry Andricdefm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
12380b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
12390b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PSY>,
124006c3fb27SDimitry Andric                               PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG;
12410b57cec5SDimitry Andric
12420b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
12430b57cec5SDimitry Andric                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
12440b57cec5SDimitry Andric                            SSEPackedSingle, WriteCvtI2PS>,
12450b57cec5SDimitry Andric                            PS, Requires<[UseSSE2]>;
1246480093f4SDimitry Andric}
12470b57cec5SDimitry Andric
12480b57cec5SDimitry Andric// AVX aliases
12490b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
12500b57cec5SDimitry Andric                (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12510b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
12520b57cec5SDimitry Andric                (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
12530b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
12540b57cec5SDimitry Andric                (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12550b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
12560b57cec5SDimitry Andric                (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
12570b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
12580b57cec5SDimitry Andric                (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12590b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
12600b57cec5SDimitry Andric                (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
12610b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
12620b57cec5SDimitry Andric                (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12630b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
12640b57cec5SDimitry Andric                (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
12650b57cec5SDimitry Andric
12660b57cec5SDimitry Andric// SSE aliases
12670b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
12680b57cec5SDimitry Andric                (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12690b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
12700b57cec5SDimitry Andric                (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
12710b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
12720b57cec5SDimitry Andric                (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12730b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
12740b57cec5SDimitry Andric                (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
12750b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
12760b57cec5SDimitry Andric                (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12770b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
12780b57cec5SDimitry Andric                (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
12790b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
12800b57cec5SDimitry Andric                (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12810b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
12820b57cec5SDimitry Andric                (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
12830b57cec5SDimitry Andric
12840b57cec5SDimitry Andric/// SSE 2 Only
12850b57cec5SDimitry Andric
12860b57cec5SDimitry Andric// Convert scalar double to scalar single
1287e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
1288e8d8bef9SDimitry Andric    ExeDomain = SSEPackedSingle in {
12890b57cec5SDimitry Andricdef VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
12900b57cec5SDimitry Andric                        (ins FR32:$src1, FR64:$src2),
12910b57cec5SDimitry Andric                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
129206c3fb27SDimitry Andric                        VEX_4V, VEX_LIG, WIG,
1293480093f4SDimitry Andric                        Sched<[WriteCvtSD2SS]>, SIMD_EXC;
12940b57cec5SDimitry Andriclet mayLoad = 1 in
12950b57cec5SDimitry Andricdef VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
12960b57cec5SDimitry Andric                     (ins FR32:$src1, f64mem:$src2),
12970b57cec5SDimitry Andric                     "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
129806c3fb27SDimitry Andric                     XD, VEX_4V, VEX_LIG, WIG,
1299480093f4SDimitry Andric                     Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
13000b57cec5SDimitry Andric}
13010b57cec5SDimitry Andric
1302480093f4SDimitry Andricdef : Pat<(f32 (any_fpround FR64:$src)),
13030b57cec5SDimitry Andric            (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
13040b57cec5SDimitry Andric          Requires<[UseAVX]>;
13050b57cec5SDimitry Andric
1306e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
13070b57cec5SDimitry Andricdef CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
13080b57cec5SDimitry Andric                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1309480093f4SDimitry Andric                      [(set FR32:$dst, (any_fpround FR64:$src))]>,
1310480093f4SDimitry Andric                      Sched<[WriteCvtSD2SS]>, SIMD_EXC;
13110b57cec5SDimitry Andricdef CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
13120b57cec5SDimitry Andric                    "cvtsd2ss\t{$src, $dst|$dst, $src}",
1313480093f4SDimitry Andric                    [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
13140b57cec5SDimitry Andric                    XD, Requires<[UseSSE2, OptForSize]>,
1315bdd1243dSDimitry Andric                    Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
13160b57cec5SDimitry Andric}
13170b57cec5SDimitry Andric
1318e8d8bef9SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
13190b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
13200b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
13210b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
13220b57cec5SDimitry Andric                       [(set VR128:$dst,
13230b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
132406c3fb27SDimitry Andric                       XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
13250b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS]>;
13260b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
13270b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
13280b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
13290b57cec5SDimitry Andric                       [(set VR128:$dst,
13305ffd83dbSDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
133106c3fb27SDimitry Andric                       XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
13320b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
13330b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
13340b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
13350b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
13360b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
13370b57cec5SDimitry Andric                       [(set VR128:$dst,
13380b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
13390b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
13400b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
13410b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
13420b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
13430b57cec5SDimitry Andric                       [(set VR128:$dst,
13445ffd83dbSDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
13450b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>,
13460b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
13470b57cec5SDimitry Andric}
1348480093f4SDimitry Andric}
13490b57cec5SDimitry Andric
13500b57cec5SDimitry Andric// Convert scalar single to scalar double
13510b57cec5SDimitry Andric// SSE2 instructions with XS prefix
1352e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
13530b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
13540b57cec5SDimitry Andric                    (ins FR64:$src1, FR32:$src2),
13550b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
135606c3fb27SDimitry Andric                    XS, VEX_4V, VEX_LIG, WIG,
1357480093f4SDimitry Andric                    Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
13580b57cec5SDimitry Andriclet mayLoad = 1 in
13590b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
13600b57cec5SDimitry Andric                    (ins FR64:$src1, f32mem:$src2),
13610b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
136206c3fb27SDimitry Andric                    XS, VEX_4V, VEX_LIG, WIG,
13630b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
1364480093f4SDimitry Andric                    Requires<[UseAVX, OptForSize]>, SIMD_EXC;
13650b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0
13660b57cec5SDimitry Andric
1367480093f4SDimitry Andricdef : Pat<(f64 (any_fpextend FR32:$src)),
13680b57cec5SDimitry Andric    (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
1369480093f4SDimitry Andricdef : Pat<(any_fpextend (loadf32 addr:$src)),
13700b57cec5SDimitry Andric    (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
13710b57cec5SDimitry Andric
1372e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
13730b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
13740b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1375480093f4SDimitry Andric                   [(set FR64:$dst, (any_fpextend FR32:$src))]>,
1376480093f4SDimitry Andric                   XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
13770b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
13780b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1379480093f4SDimitry Andric                   [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
13800b57cec5SDimitry Andric                   XS, Requires<[UseSSE2, OptForSize]>,
1381bdd1243dSDimitry Andric                   Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC;
13820b57cec5SDimitry Andric} // isCodeGenOnly = 1
13830b57cec5SDimitry Andric
1384e8d8bef9SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
1385e8d8bef9SDimitry Andric    ExeDomain = SSEPackedSingle in {
13860b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
13870b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
13880b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
138906c3fb27SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, WIG,
13900b57cec5SDimitry Andric                    Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
13910b57cec5SDimitry Andriclet mayLoad = 1 in
13920b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
13930b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
13940b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
139506c3fb27SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, WIG, Requires<[HasAVX]>,
13960b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
13970b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
13980b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
13990b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
14000b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
14010b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
14020b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD]>;
14030b57cec5SDimitry Andriclet mayLoad = 1 in
14040b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
14050b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
14060b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
14070b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
14080b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
14090b57cec5SDimitry Andric}
14100b57cec5SDimitry Andric} // hasSideEffects = 0
14110b57cec5SDimitry Andric
14120b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
14130b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
14140b57cec5SDimitry Andric// vmovs{s,d} instructions
14150b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
14160b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14170b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
14180b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1419480093f4SDimitry Andric                     (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
14200b57cec5SDimitry Andric          (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
14210b57cec5SDimitry Andric
14220b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14230b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
14240b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1425480093f4SDimitry Andric                     (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
14260b57cec5SDimitry Andric          (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
14270b57cec5SDimitry Andric
14280b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14290b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1430480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
14310b57cec5SDimitry Andric          (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
14320b57cec5SDimitry Andric
14330b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14340b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1435480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
14360b57cec5SDimitry Andric          (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
14370b57cec5SDimitry Andric
14380b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14390b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1440480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
14410b57cec5SDimitry Andric          (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
14420b57cec5SDimitry Andric
14430b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14440b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1445480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
14460b57cec5SDimitry Andric          (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
14470b57cec5SDimitry Andric
14480b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14490b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1450480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
14510b57cec5SDimitry Andric          (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
14520b57cec5SDimitry Andric
14530b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14540b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1455480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
14560b57cec5SDimitry Andric          (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
14570b57cec5SDimitry Andric
14580b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14590b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1460480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
14610b57cec5SDimitry Andric          (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
14620b57cec5SDimitry Andric
14630b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14640b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1465480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
14660b57cec5SDimitry Andric          (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
14670b57cec5SDimitry Andric} // Predicates = [UseAVX]
14680b57cec5SDimitry Andric
14690b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
14700b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14710b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
14720b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1473480093f4SDimitry Andric                     (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
14740b57cec5SDimitry Andric          (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
14750b57cec5SDimitry Andric
14760b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14770b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
14780b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1479480093f4SDimitry Andric                     (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
14800b57cec5SDimitry Andric          (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
14810b57cec5SDimitry Andric
14820b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14830b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1484480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
14850b57cec5SDimitry Andric          (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
14860b57cec5SDimitry Andric
14870b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14880b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1489480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
14900b57cec5SDimitry Andric          (CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
14910b57cec5SDimitry Andric
14920b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14930b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1494480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
14950b57cec5SDimitry Andric          (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
14960b57cec5SDimitry Andric
14970b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14980b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1499480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
15000b57cec5SDimitry Andric          (CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
15010b57cec5SDimitry Andric} // Predicates = [UseSSE2]
15020b57cec5SDimitry Andric
15030b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
15040b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
15050b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1506480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
15070b57cec5SDimitry Andric          (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
15080b57cec5SDimitry Andric
15090b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
15100b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1511480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
15120b57cec5SDimitry Andric          (CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
15130b57cec5SDimitry Andric
15140b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
15150b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1516480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
15170b57cec5SDimitry Andric          (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
15180b57cec5SDimitry Andric
15190b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
15200b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1521480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
15220b57cec5SDimitry Andric          (CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
15230b57cec5SDimitry Andric} // Predicates = [UseSSE1]
15240b57cec5SDimitry Andric
15250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
15260b57cec5SDimitry Andric// Convert packed single/double fp to doubleword
15270b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15280b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
15290b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
153006c3fb27SDimitry Andric                       VEX, Sched<[WriteCvtPS2I]>, WIG, SIMD_EXC;
15310b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15320b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
15330b57cec5SDimitry Andric                       [(set VR128:$dst,
15340b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
153506c3fb27SDimitry Andric                       VEX, Sched<[WriteCvtPS2ILd]>, WIG, SIMD_EXC;
15360b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
15370b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
15380b57cec5SDimitry Andric                        [(set VR256:$dst,
15390b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
154006c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG, SIMD_EXC;
15410b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
15420b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
15430b57cec5SDimitry Andric                        [(set VR256:$dst,
15440b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
154506c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, WIG, SIMD_EXC;
15460b57cec5SDimitry Andric}
15470b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15480b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
15490b57cec5SDimitry Andric                     [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
1550480093f4SDimitry Andric                     Sched<[WriteCvtPS2I]>, SIMD_EXC;
15510b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15520b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
15530b57cec5SDimitry Andric                     [(set VR128:$dst,
15540b57cec5SDimitry Andric                       (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
1555480093f4SDimitry Andric                     Sched<[WriteCvtPS2ILd]>, SIMD_EXC;
15560b57cec5SDimitry Andric
15570b57cec5SDimitry Andric
15580b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers
1559480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
15600b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
15610b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
15620b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
15630b57cec5SDimitry Andricdef VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15640b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
15650b57cec5SDimitry Andric                       [(set VR128:$dst,
15660b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
156706c3fb27SDimitry Andric                       VEX, Sched<[WriteCvtPD2I]>, WIG;
15680b57cec5SDimitry Andric
15690b57cec5SDimitry Andric// XMM only
15700b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15710b57cec5SDimitry Andric                      "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
15720b57cec5SDimitry Andric                      [(set VR128:$dst,
15730b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
157406c3fb27SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, WIG;
15750b57cec5SDimitry Andric
15760b57cec5SDimitry Andric// YMM only
15770b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
15780b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
15790b57cec5SDimitry Andric                       [(set VR128:$dst,
15800b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
158106c3fb27SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG;
15820b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
15830b57cec5SDimitry Andric                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
15840b57cec5SDimitry Andric                       [(set VR128:$dst,
15850b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
158606c3fb27SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG;
15870b57cec5SDimitry Andric}
15880b57cec5SDimitry Andric
15890b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
15900b57cec5SDimitry Andric                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
15910b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
15920b57cec5SDimitry Andric                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
15930b57cec5SDimitry Andric
15940b57cec5SDimitry Andricdef CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15950b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
15960b57cec5SDimitry Andric                      [(set VR128:$dst,
15970b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
1598480093f4SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
15990b57cec5SDimitry Andricdef CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16000b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
16010b57cec5SDimitry Andric                      [(set VR128:$dst,
16020b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
1603480093f4SDimitry Andric                      Sched<[WriteCvtPD2I]>, SIMD_EXC;
16040b57cec5SDimitry Andric
16050b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword
16060b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix
1607480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
16080b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
16090b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16100b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
16110b57cec5SDimitry Andric                         [(set VR128:$dst,
1612480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
161306c3fb27SDimitry Andric                         VEX, Sched<[WriteCvtPS2I]>, WIG;
16140b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
16150b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
16160b57cec5SDimitry Andric                         [(set VR128:$dst,
1617480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>,
161806c3fb27SDimitry Andric                         VEX, Sched<[WriteCvtPS2ILd]>, WIG;
16190b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
16200b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
16210b57cec5SDimitry Andric                          [(set VR256:$dst,
1622480093f4SDimitry Andric                            (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>,
162306c3fb27SDimitry Andric                          VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG;
16240b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
16250b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
16260b57cec5SDimitry Andric                          [(set VR256:$dst,
1627480093f4SDimitry Andric                            (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>,
16280b57cec5SDimitry Andric                          VEX, VEX_L,
162906c3fb27SDimitry Andric                          Sched<[WriteCvtPS2IYLd]>, WIG;
16300b57cec5SDimitry Andric}
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16330b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
16340b57cec5SDimitry Andric                       [(set VR128:$dst,
1635480093f4SDimitry Andric                         (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
16360b57cec5SDimitry Andric                       Sched<[WriteCvtPS2I]>;
16370b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
16380b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
16390b57cec5SDimitry Andric                       [(set VR128:$dst,
1640480093f4SDimitry Andric                         (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>,
16410b57cec5SDimitry Andric                       Sched<[WriteCvtPS2ILd]>;
1642480093f4SDimitry Andric}
16430b57cec5SDimitry Andric
16440b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
16450b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
16460b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1647480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
16480b57cec5SDimitry Andric// XMM only
16490b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16500b57cec5SDimitry Andric                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
16510b57cec5SDimitry Andric                        [(set VR128:$dst,
1652480093f4SDimitry Andric                          (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
165306c3fb27SDimitry Andric                        VEX, Sched<[WriteCvtPD2I]>, WIG;
16540b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
16550b57cec5SDimitry Andric                        "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
16560b57cec5SDimitry Andric                        [(set VR128:$dst,
1657480093f4SDimitry Andric                          (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>,
165806c3fb27SDimitry Andric                        VEX, Sched<[WriteCvtPD2ILd]>, WIG;
16590b57cec5SDimitry Andric
16600b57cec5SDimitry Andric// YMM only
16610b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
16620b57cec5SDimitry Andric                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
16630b57cec5SDimitry Andric                         [(set VR128:$dst,
1664480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>,
166506c3fb27SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG;
16660b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
16670b57cec5SDimitry Andric                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
16680b57cec5SDimitry Andric                         [(set VR128:$dst,
1669480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>,
167006c3fb27SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG;
16710b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
16720b57cec5SDimitry Andric
16730b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
16740b57cec5SDimitry Andric                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
16750b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
16760b57cec5SDimitry Andric                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
16770b57cec5SDimitry Andric
16780b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1679480093f4SDimitry Andric  def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))),
16800b57cec5SDimitry Andric            (VCVTTPD2DQYrr VR256:$src)>;
1681480093f4SDimitry Andric  def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))),
16820b57cec5SDimitry Andric            (VCVTTPD2DQYrm addr:$src)>;
16830b57cec5SDimitry Andric}
16840b57cec5SDimitry Andric
16850b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16860b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
16870b57cec5SDimitry Andric                      [(set VR128:$dst,
1688480093f4SDimitry Andric                        (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
1689480093f4SDimitry Andric                      Sched<[WriteCvtPD2I]>, SIMD_EXC;
16900b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
16910b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
16920b57cec5SDimitry Andric                      [(set VR128:$dst,
1693480093f4SDimitry Andric                        (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>,
1694480093f4SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
16950b57cec5SDimitry Andric
16960b57cec5SDimitry Andric// Convert packed single to packed double
1697480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
16980b57cec5SDimitry Andric                  // SSE2 instructions without OpSize prefix
16990b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17000b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
1701480093f4SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
170206c3fb27SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD]>, WIG;
17030b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
17040b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
17050b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
170606c3fb27SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG;
17070b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
17080b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
1709480093f4SDimitry Andric                     [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
171006c3fb27SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG;
17110b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
17120b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
17130b57cec5SDimitry Andric                     [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
171406c3fb27SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG;
17150b57cec5SDimitry Andric}
17160b57cec5SDimitry Andric
1717480093f4SDimitry Andriclet Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
17180b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17190b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
1720480093f4SDimitry Andric                   [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
17210b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD]>;
17220b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
17230b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
17240b57cec5SDimitry Andric                   [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
17250b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD.Folded]>;
17260b57cec5SDimitry Andric}
17270b57cec5SDimitry Andric
17280b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP
17290b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
17300b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
17310b57cec5SDimitry Andricdef VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
17320b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17330b57cec5SDimitry Andric                        [(set VR128:$dst,
1734480093f4SDimitry Andric                          (v2f64 (X86any_VSintToFP
17350b57cec5SDimitry Andric                                  (bc_v4i32
17360b57cec5SDimitry Andric                                   (v2i64 (scalar_to_vector
17370b57cec5SDimitry Andric                                           (loadi64 addr:$src)))))))]>,
173806c3fb27SDimitry Andric                        VEX, Sched<[WriteCvtI2PDLd]>, WIG;
17390b57cec5SDimitry Andricdef VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17400b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17410b57cec5SDimitry Andric                        [(set VR128:$dst,
1742480093f4SDimitry Andric                          (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
174306c3fb27SDimitry Andric                        VEX, Sched<[WriteCvtI2PD]>, WIG;
17440b57cec5SDimitry Andricdef VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
17450b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17460b57cec5SDimitry Andric                         [(set VR256:$dst,
1747480093f4SDimitry Andric                           (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
17480b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
174906c3fb27SDimitry Andric                         WIG;
17500b57cec5SDimitry Andricdef VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
17510b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17520b57cec5SDimitry Andric                         [(set VR256:$dst,
1753480093f4SDimitry Andric                           (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
175406c3fb27SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDY]>, WIG;
17550b57cec5SDimitry Andric}
17560b57cec5SDimitry Andric
17570b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
17580b57cec5SDimitry Andricdef CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
17590b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
17600b57cec5SDimitry Andric                       [(set VR128:$dst,
1761480093f4SDimitry Andric                         (v2f64 (X86any_VSintToFP
17620b57cec5SDimitry Andric                                 (bc_v4i32
17630b57cec5SDimitry Andric                                  (v2i64 (scalar_to_vector
17640b57cec5SDimitry Andric                                          (loadi64 addr:$src)))))))]>,
17650b57cec5SDimitry Andric                       Sched<[WriteCvtI2PDLd]>;
17660b57cec5SDimitry Andricdef CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17670b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
17680b57cec5SDimitry Andric                       [(set VR128:$dst,
1769480093f4SDimitry Andric                         (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
17700b57cec5SDimitry Andric                       Sched<[WriteCvtI2PD]>;
17710b57cec5SDimitry Andric
17720b57cec5SDimitry Andric// AVX register conversion intrinsics
17730b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1774480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
17750b57cec5SDimitry Andric            (VCVTDQ2PDrm addr:$src)>;
17760b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
17770b57cec5SDimitry Andric
17780b57cec5SDimitry Andric// SSE2 register conversion intrinsics
17790b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1780480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
17810b57cec5SDimitry Andric            (CVTDQ2PDrm addr:$src)>;
17820b57cec5SDimitry Andric} // Predicates = [UseSSE2]
17830b57cec5SDimitry Andric
17840b57cec5SDimitry Andric// Convert packed double to packed single
17850b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
17860b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
17870b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1788480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
17890b57cec5SDimitry Andric// XMM only
17900b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17910b57cec5SDimitry Andric                       "cvtpd2ps\t{$src, $dst|$dst, $src}",
1792349cc55cSDimitry Andric                       [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
179306c3fb27SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS]>, WIG;
17940b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
17950b57cec5SDimitry Andric                       "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
1796349cc55cSDimitry Andric                       [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
179706c3fb27SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS.Folded]>, WIG;
17980b57cec5SDimitry Andric
17990b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
18000b57cec5SDimitry Andric                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
1801349cc55cSDimitry Andric                        [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
180206c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, WIG;
18030b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
18040b57cec5SDimitry Andric                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
1805349cc55cSDimitry Andric                        [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
180606c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, WIG;
18070b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
18080b57cec5SDimitry Andric
18090b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
18100b57cec5SDimitry Andric                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
18110b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
18120b57cec5SDimitry Andric                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
18130b57cec5SDimitry Andric
18140b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
18150b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1816349cc55cSDimitry Andric                     [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
1817480093f4SDimitry Andric                     Sched<[WriteCvtPD2PS]>, SIMD_EXC;
18180b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
18190b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1820349cc55cSDimitry Andric                     [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
1821480093f4SDimitry Andric                     Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
18220b57cec5SDimitry Andric
18230b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
18240b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions
18250b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
18260b57cec5SDimitry Andric
18270b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
18280b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
18295ffd83dbSDimitry Andric                            Operand memop, SDNode OpNode, ValueType VT,
18300b57cec5SDimitry Andric                            PatFrag ld_frag, string asm,
18315ffd83dbSDimitry Andric                            X86FoldableSchedWrite sched,
18325ffd83dbSDimitry Andric                            PatFrags mem_frags> {
18335ffd83dbSDimitry Andric  def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
18345ffd83dbSDimitry Andric                    (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
18355ffd83dbSDimitry Andric                    [(set VR128:$dst, (OpNode (VT VR128:$src1),
18365ffd83dbSDimitry Andric                                              VR128:$src2, timm:$cc))]>,
18375ffd83dbSDimitry Andric           Sched<[sched]>, SIMD_EXC;
18385ffd83dbSDimitry Andric  let mayLoad = 1 in
18395ffd83dbSDimitry Andric  def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
18405ffd83dbSDimitry Andric                    (ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
18415ffd83dbSDimitry Andric                    [(set VR128:$dst, (OpNode (VT VR128:$src1),
18425ffd83dbSDimitry Andric                                              (mem_frags addr:$src2), timm:$cc))]>,
18435ffd83dbSDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18445ffd83dbSDimitry Andric
18455ffd83dbSDimitry Andric  let isCodeGenOnly = 1 in {
18460b57cec5SDimitry Andric    let isCommutable = 1 in
18470b57cec5SDimitry Andric    def rr : SIi8<0xC2, MRMSrcReg,
18480b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
18495ffd83dbSDimitry Andric                  [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
18505ffd83dbSDimitry Andric                  Sched<[sched]>, SIMD_EXC;
18510b57cec5SDimitry Andric    def rm : SIi8<0xC2, MRMSrcMem,
18520b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
18535ffd83dbSDimitry Andric                  [(set RC:$dst, (OpNode RC:$src1,
18548bcb0991SDimitry Andric                                         (ld_frag addr:$src2), timm:$cc))]>,
18555ffd83dbSDimitry Andric                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18560b57cec5SDimitry Andric  }
1857480093f4SDimitry Andric}
18580b57cec5SDimitry Andric
18590b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
18605ffd83dbSDimitry Andricdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
18610b57cec5SDimitry Andric                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
18620b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
186306c3fb27SDimitry Andric                 XS, VEX_4V, VEX_LIG, WIG;
18640b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
18655ffd83dbSDimitry Andricdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
18665ffd83dbSDimitry Andric                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
18670b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
186806c3fb27SDimitry Andric                 XD, VEX_4V, VEX_LIG, WIG;
18695ffd83dbSDimitry Andric
18700b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
18710b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
18725ffd83dbSDimitry Andric  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
18735ffd83dbSDimitry Andric                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
18740b57cec5SDimitry Andric                  SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
18750b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
18765ffd83dbSDimitry Andric  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
18775ffd83dbSDimitry Andric                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
18780b57cec5SDimitry Andric                  SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
18790b57cec5SDimitry Andric}
18800b57cec5SDimitry Andric
18810b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
1882fe6060f1SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode,
18830b57cec5SDimitry Andric                         ValueType vt, X86MemOperand x86memop,
1884480093f4SDimitry Andric                         PatFrag ld_frag, string OpcodeStr, Domain d,
18855ffd83dbSDimitry Andric                         X86FoldableSchedWrite sched = WriteFComX> {
18865ffd83dbSDimitry Andric  let ExeDomain = d in {
18870b57cec5SDimitry Andric  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
18880b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18890b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
18905ffd83dbSDimitry Andric          Sched<[sched]>, SIMD_EXC;
18910b57cec5SDimitry Andric  let mayLoad = 1 in
18920b57cec5SDimitry Andric  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
18930b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18940b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
18950b57cec5SDimitry Andric                                           (ld_frag addr:$src2)))]>,
18965ffd83dbSDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18970b57cec5SDimitry Andric}
18980b57cec5SDimitry Andric}
18990b57cec5SDimitry Andric
19000b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
19010b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
19020b57cec5SDimitry Andric                             ValueType vt, Operand memop,
19035ffd83dbSDimitry Andric                             PatFrags mem_frags, string OpcodeStr,
1904480093f4SDimitry Andric                             Domain d,
19055ffd83dbSDimitry Andric                             X86FoldableSchedWrite sched = WriteFComX> {
19065ffd83dbSDimitry Andriclet ExeDomain = d in {
19070b57cec5SDimitry Andric  def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
19080b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
19090b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
19105ffd83dbSDimitry Andric          Sched<[sched]>, SIMD_EXC;
19110b57cec5SDimitry Andriclet mayLoad = 1 in
19120b57cec5SDimitry Andric  def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
19130b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
19140b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
19155ffd83dbSDimitry Andric                                           (mem_frags addr:$src2)))]>,
19165ffd83dbSDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
19170b57cec5SDimitry Andric}
1918480093f4SDimitry Andric}
19190b57cec5SDimitry Andric
19200b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
1921480093f4SDimitry Andric  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
192206c3fb27SDimitry Andric                               "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
1923480093f4SDimitry Andric  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
192406c3fb27SDimitry Andric                               "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
1925480093f4SDimitry Andric  defm VCOMISS  : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
192606c3fb27SDimitry Andric                               "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
1927480093f4SDimitry Andric  defm VCOMISD  : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
192806c3fb27SDimitry Andric                               "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
19290b57cec5SDimitry Andric
19300b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
19310b57cec5SDimitry Andric    defm VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
193206c3fb27SDimitry Andric                      sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
19330b57cec5SDimitry Andric    defm VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
193406c3fb27SDimitry Andric                      sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
19350b57cec5SDimitry Andric
19360b57cec5SDimitry Andric    defm VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
193706c3fb27SDimitry Andric                       sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
19380b57cec5SDimitry Andric    defm VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
193906c3fb27SDimitry Andric                       sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
19400b57cec5SDimitry Andric  }
1941480093f4SDimitry Andric  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
1942480093f4SDimitry Andric                                  "ucomiss", SSEPackedSingle>, PS;
1943480093f4SDimitry Andric  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
1944480093f4SDimitry Andric                                  "ucomisd", SSEPackedDouble>, PD;
1945480093f4SDimitry Andric  defm COMISS   : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
1946480093f4SDimitry Andric                                  "comiss", SSEPackedSingle>, PS;
1947480093f4SDimitry Andric  defm COMISD   : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
1948480093f4SDimitry Andric                                  "comisd", SSEPackedDouble>, PD;
19490b57cec5SDimitry Andric
19500b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
19510b57cec5SDimitry Andric    defm UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
1952480093f4SDimitry Andric                            sse_load_f32, "ucomiss", SSEPackedSingle>, PS;
19530b57cec5SDimitry Andric    defm UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
1954480093f4SDimitry Andric                            sse_load_f64, "ucomisd", SSEPackedDouble>, PD;
19550b57cec5SDimitry Andric
19560b57cec5SDimitry Andric    defm COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
1957480093f4SDimitry Andric                                sse_load_f32, "comiss", SSEPackedSingle>, PS;
19580b57cec5SDimitry Andric    defm COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
1959480093f4SDimitry Andric                                    sse_load_f64, "comisd", SSEPackedDouble>, PD;
19600b57cec5SDimitry Andric  }
19610b57cec5SDimitry Andric} // Defs = [EFLAGS]
19620b57cec5SDimitry Andric
19630b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions
19640b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
19650b57cec5SDimitry Andric                            ValueType VT, string asm,
19660b57cec5SDimitry Andric                            X86FoldableSchedWrite sched,
19670b57cec5SDimitry Andric                            Domain d, PatFrag ld_frag> {
19680b57cec5SDimitry Andric  let isCommutable = 1 in
19690b57cec5SDimitry Andric  def rri : PIi8<0xC2, MRMSrcReg,
19700b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
1971480093f4SDimitry Andric             [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
19725ffd83dbSDimitry Andric            Sched<[sched]>, SIMD_EXC;
19730b57cec5SDimitry Andric  def rmi : PIi8<0xC2, MRMSrcMem,
19740b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
19750b57cec5SDimitry Andric             [(set RC:$dst,
1976480093f4SDimitry Andric               (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
19775ffd83dbSDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1978480093f4SDimitry Andric}
19790b57cec5SDimitry Andric
19800b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
19810b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
198206c3fb27SDimitry Andric               SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, WIG;
19830b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
19840b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
198506c3fb27SDimitry Andric               SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, WIG;
19860b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
19870b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
198806c3fb27SDimitry Andric               SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, WIG;
19890b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
19900b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
199106c3fb27SDimitry Andric               SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, WIG;
19920b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
19930b57cec5SDimitry Andric  defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
19940b57cec5SDimitry Andric                 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
19950b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
19960b57cec5SDimitry Andric  defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
19970b57cec5SDimitry Andric                 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
19980b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
19990b57cec5SDimitry Andric}
20000b57cec5SDimitry Andric
20018bcb0991SDimitry Andricdef CommutableCMPCC : PatLeaf<(timm), [{
20020b57cec5SDimitry Andric  uint64_t Imm = N->getZExtValue() & 0x7;
20030b57cec5SDimitry Andric  return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
20040b57cec5SDimitry Andric}]>;
20050b57cec5SDimitry Andric
20060b57cec5SDimitry Andric// Patterns to select compares with loads in first operand.
20070b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
2008480093f4SDimitry Andric  def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1,
20090b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20108bcb0991SDimitry Andric            (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>;
20110b57cec5SDimitry Andric
2012480093f4SDimitry Andric  def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1,
20130b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20148bcb0991SDimitry Andric            (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>;
20150b57cec5SDimitry Andric
2016480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1,
20170b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20188bcb0991SDimitry Andric            (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
20190b57cec5SDimitry Andric
2020480093f4SDimitry Andric  def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1,
20210b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20228bcb0991SDimitry Andric            (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
20230b57cec5SDimitry Andric
20240b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
20250b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
20268bcb0991SDimitry Andric            (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
20270b57cec5SDimitry Andric
20280b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
20290b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
20308bcb0991SDimitry Andric            (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
20310b57cec5SDimitry Andric}
20320b57cec5SDimitry Andric
20330b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
2034480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1,
20350b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20368bcb0991SDimitry Andric            (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
20370b57cec5SDimitry Andric
20380b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
20390b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
20408bcb0991SDimitry Andric            (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
20410b57cec5SDimitry Andric}
20420b57cec5SDimitry Andric
20430b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
2044480093f4SDimitry Andric  def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1,
20450b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20468bcb0991SDimitry Andric            (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
20470b57cec5SDimitry Andric
20480b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
20490b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
20508bcb0991SDimitry Andric            (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
20510b57cec5SDimitry Andric}
20520b57cec5SDimitry Andric
20530b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20540b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions
20550b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20560b57cec5SDimitry Andric
20570b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
20580b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
20590b57cec5SDimitry Andric                         ValueType vt, string asm, PatFrag mem_frag,
20600b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, Domain d,
20610b57cec5SDimitry Andric                         bit IsCommutable = 0> {
20620b57cec5SDimitry Andric  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
20630b57cec5SDimitry Andric                   (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
20640b57cec5SDimitry Andric                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
20658bcb0991SDimitry Andric                                       (i8 timm:$src3))))], d>,
20660b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>;
20670b57cec5SDimitry Andric  let isCommutable = IsCommutable in
20680b57cec5SDimitry Andric  def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
20690b57cec5SDimitry Andric                 (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
20700b57cec5SDimitry Andric                 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
20718bcb0991SDimitry Andric                                     (i8 timm:$src3))))], d>,
20720b57cec5SDimitry Andric            Sched<[sched]>;
20730b57cec5SDimitry Andric}
20740b57cec5SDimitry Andric
20750b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
20760b57cec5SDimitry Andric  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
20770b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20780b57cec5SDimitry Andric           loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
207906c3fb27SDimitry Andric           PS, VEX_4V, WIG;
20800b57cec5SDimitry Andric  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
20810b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20820b57cec5SDimitry Andric           loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
208306c3fb27SDimitry Andric           PS, VEX_4V, VEX_L, WIG;
20840b57cec5SDimitry Andric  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
20850b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20860b57cec5SDimitry Andric           loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
208706c3fb27SDimitry Andric           PD, VEX_4V, WIG;
20880b57cec5SDimitry Andric  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
20890b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20900b57cec5SDimitry Andric           loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
209106c3fb27SDimitry Andric           PD, VEX_4V, VEX_L, WIG;
20920b57cec5SDimitry Andric}
20930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
20940b57cec5SDimitry Andric  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
20950b57cec5SDimitry Andric                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
20960b57cec5SDimitry Andric                    memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
20970b57cec5SDimitry Andric  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
20980b57cec5SDimitry Andric                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
20990b57cec5SDimitry Andric                    memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
21000b57cec5SDimitry Andric}
21010b57cec5SDimitry Andric
21020b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21030b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions
21040b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21050b57cec5SDimitry Andric
21060b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
21070b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
21080b57cec5SDimitry Andric                                   PatFrag mem_frag, RegisterClass RC,
21090b57cec5SDimitry Andric                                   X86MemOperand x86memop, string asm,
21100b57cec5SDimitry Andric                                   X86FoldableSchedWrite sched, Domain d,
21110b57cec5SDimitry Andric                                   bit IsCommutable = 0> {
21120b57cec5SDimitry Andric    let isCommutable = IsCommutable in
21130b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg,
21140b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, RC:$src2),
21150b57cec5SDimitry Andric                asm, [(set RC:$dst,
21160b57cec5SDimitry Andric                           (vt (OpNode RC:$src1, RC:$src2)))], d>,
21170b57cec5SDimitry Andric                Sched<[sched]>;
21180b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem,
21190b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
21200b57cec5SDimitry Andric                asm, [(set RC:$dst,
21210b57cec5SDimitry Andric                           (vt (OpNode RC:$src1,
21220b57cec5SDimitry Andric                                       (mem_frag addr:$src2))))], d>,
21230b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
21240b57cec5SDimitry Andric}
21250b57cec5SDimitry Andric
21260b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
21270b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
21280b57cec5SDimitry Andric      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
212906c3fb27SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
21300b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
21310b57cec5SDimitry Andric      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
213206c3fb27SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, WIG;
21330b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
21340b57cec5SDimitry Andric      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
213506c3fb27SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
21360b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
21370b57cec5SDimitry Andric      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
213806c3fb27SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, WIG;
21390b57cec5SDimitry Andric
21400b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
21410b57cec5SDimitry Andric      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
214206c3fb27SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
21430b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
21440b57cec5SDimitry Andric      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
214506c3fb27SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
21460b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
21470b57cec5SDimitry Andric      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
214806c3fb27SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
21490b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
21500b57cec5SDimitry Andric      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
215106c3fb27SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
21520b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX]
21530b57cec5SDimitry Andric
21540b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
21550b57cec5SDimitry Andric  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
21560b57cec5SDimitry Andric        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
21570b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
21580b57cec5SDimitry Andric  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
21590b57cec5SDimitry Andric        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
21600b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
21610b57cec5SDimitry Andric  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
21620b57cec5SDimitry Andric        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
21630b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
21640b57cec5SDimitry Andric  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
21650b57cec5SDimitry Andric        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
21660b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
21670b57cec5SDimitry Andric} // Constraints = "$src1 = $dst"
21680b57cec5SDimitry Andric
21690b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
21700b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
21710b57cec5SDimitry Andric            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
21720b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
21730b57cec5SDimitry Andric            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
21740b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
21750b57cec5SDimitry Andric            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
21760b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
21770b57cec5SDimitry Andric            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
21780b57cec5SDimitry Andric
21790b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
21800b57cec5SDimitry Andric            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
21810b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
21820b57cec5SDimitry Andric            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
21830b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
21840b57cec5SDimitry Andric            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
21850b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
21860b57cec5SDimitry Andric            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
21870b57cec5SDimitry Andric}
21880b57cec5SDimitry Andric
21890b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
21900b57cec5SDimitry Andric  // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
21910b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
21928bcb0991SDimitry Andric                              (v2f64 (simple_load addr:$src2)))),
21930b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
21940b57cec5SDimitry Andric}
21950b57cec5SDimitry Andric
21960b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21970b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask
21980b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21990b57cec5SDimitry Andric
22000b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
22010b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
22020b57cec5SDimitry Andric                                string asm, Domain d> {
22030b57cec5SDimitry Andric  def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
22040b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
22050b57cec5SDimitry Andric              [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>,
22060b57cec5SDimitry Andric              Sched<[WriteFMOVMSK]>;
22070b57cec5SDimitry Andric}
22080b57cec5SDimitry Andric
22090b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
22100b57cec5SDimitry Andric  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
221106c3fb27SDimitry Andric                                        SSEPackedSingle>, PS, VEX, WIG;
22120b57cec5SDimitry Andric  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
221306c3fb27SDimitry Andric                                        SSEPackedDouble>, PD, VEX, WIG;
22140b57cec5SDimitry Andric  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
221506c3fb27SDimitry Andric                                         SSEPackedSingle>, PS, VEX, VEX_L, WIG;
22160b57cec5SDimitry Andric  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
221706c3fb27SDimitry Andric                                         SSEPackedDouble>, PD, VEX, VEX_L, WIG;
22180b57cec5SDimitry Andric
22190b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
22200b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
22210b57cec5SDimitry Andric            (VMOVMSKPSrr VR128:$src)>;
22220b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
22230b57cec5SDimitry Andric            (VMOVMSKPDrr VR128:$src)>;
22240b57cec5SDimitry Andric  def : Pat<(X86movmsk (v8i32 VR256:$src)),
22250b57cec5SDimitry Andric            (VMOVMSKPSYrr VR256:$src)>;
22260b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i64 VR256:$src)),
22270b57cec5SDimitry Andric            (VMOVMSKPDYrr VR256:$src)>;
22280b57cec5SDimitry Andric}
22290b57cec5SDimitry Andric
22300b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
22310b57cec5SDimitry Andric                                     SSEPackedSingle>, PS;
22320b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
22330b57cec5SDimitry Andric                                     SSEPackedDouble>, PD;
22340b57cec5SDimitry Andric
22350b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
22360b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
22370b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
22380b57cec5SDimitry Andric            (MOVMSKPSrr VR128:$src)>;
22390b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
22400b57cec5SDimitry Andric            (MOVMSKPDrr VR128:$src)>;
22410b57cec5SDimitry Andric}
22420b57cec5SDimitry Andric
22430b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
22440b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
22450b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
22460b57cec5SDimitry Andric
22470b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
22480b57cec5SDimitry Andric
22490b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator.
22500b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
22510b57cec5SDimitry Andric                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
22520b57cec5SDimitry Andric                        X86MemOperand x86memop, X86FoldableSchedWrite sched,
22530b57cec5SDimitry Andric                        bit IsCommutable, bit Is2Addr> {
22540b57cec5SDimitry Andric  let isCommutable = IsCommutable in
22550b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
22560b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
22570b57cec5SDimitry Andric       !if(Is2Addr,
22580b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
22590b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
22600b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
22610b57cec5SDimitry Andric       Sched<[sched]>;
22620b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
22630b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
22640b57cec5SDimitry Andric       !if(Is2Addr,
22650b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
22660b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
22670b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
22680b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
22690b57cec5SDimitry Andric}
22700b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
22710b57cec5SDimitry Andric
22720b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
22730b57cec5SDimitry Andric                         ValueType OpVT128, ValueType OpVT256,
22740b57cec5SDimitry Andric                         X86SchedWriteWidths sched, bit IsCommutable,
22750b57cec5SDimitry Andric                         Predicate prd> {
22760b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
22770b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
22780b57cec5SDimitry Andric                             VR128, load, i128mem, sched.XMM,
227906c3fb27SDimitry Andric                             IsCommutable, 0>, VEX_4V, WIG;
22800b57cec5SDimitry Andric
22810b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
22820b57cec5SDimitry Andric  defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
22830b57cec5SDimitry Andric                           memop, i128mem, sched.XMM, IsCommutable, 1>;
22840b57cec5SDimitry Andric
22850b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
22860b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
22870b57cec5SDimitry Andric                               OpVT256, VR256, load, i256mem, sched.YMM,
228806c3fb27SDimitry Andric                               IsCommutable, 0>, VEX_4V, VEX_L, WIG;
22890b57cec5SDimitry Andric}
22900b57cec5SDimitry Andric
22910b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions
22920b57cec5SDimitry Andric
22930b57cec5SDimitry Andricdefm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
22940b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22950b57cec5SDimitry Andricdefm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
22960b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22970b57cec5SDimitry Andricdefm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
22980b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22990b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
23000b57cec5SDimitry Andric                           SchedWriteVecLogic, 0, NoVLX>;
23010b57cec5SDimitry Andric
23020b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
23030b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions
23040b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
23050b57cec5SDimitry Andric
23060b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
23070b57cec5SDimitry Andric///
23080b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2
23090b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later.
23100b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
2311349cc55cSDimitry Andric                                   X86SchedWriteWidths sched> {
23120b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
23130b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
23140b57cec5SDimitry Andric        !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
231506c3fb27SDimitry Andric        [], [], 0>, PS, VEX_4V, VEX_L, WIG;
23160b57cec5SDimitry Andric
23170b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
23180b57cec5SDimitry Andric        !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
231906c3fb27SDimitry Andric        [], [], 0>, PD, VEX_4V, VEX_L, WIG;
23200b57cec5SDimitry Andric
23210b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
23220b57cec5SDimitry Andric       !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
232306c3fb27SDimitry Andric       [], [], 0>, PS, VEX_4V, WIG;
23240b57cec5SDimitry Andric
23250b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
23260b57cec5SDimitry Andric       !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
232706c3fb27SDimitry Andric       [], [], 0>, PD, VEX_4V, WIG;
23280b57cec5SDimitry Andric  }
23290b57cec5SDimitry Andric
23300b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
23310b57cec5SDimitry Andric    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
23320b57cec5SDimitry Andric         !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
23330b57cec5SDimitry Andric         [], []>, PS;
23340b57cec5SDimitry Andric
23350b57cec5SDimitry Andric    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
23360b57cec5SDimitry Andric         !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
23370b57cec5SDimitry Andric         [], []>, PD;
23380b57cec5SDimitry Andric  }
23390b57cec5SDimitry Andric}
23400b57cec5SDimitry Andric
2341349cc55cSDimitry Andricdefm AND  : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>;
2342349cc55cSDimitry Andricdefm OR   : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>;
2343349cc55cSDimitry Andricdefm XOR  : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>;
23440b57cec5SDimitry Andriclet isCommutable = 0 in
2345349cc55cSDimitry Andric  defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>;
23460b57cec5SDimitry Andric
23470b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
23480b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
23490b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23500b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
23510b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23520b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
23530b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23540b57cec5SDimitry Andric
23550b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
23560b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23570b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
23580b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23590b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
23600b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23610b57cec5SDimitry Andric
23620b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
23630b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23640b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
23650b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23660b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
23670b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23680b57cec5SDimitry Andric
23690b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
23700b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23710b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
23720b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23730b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
23740b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23750b57cec5SDimitry Andric
23760b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
23770b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23780b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
23790b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23800b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
23810b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23820b57cec5SDimitry Andric
23830b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
23840b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23850b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
23860b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23870b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
23880b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23890b57cec5SDimitry Andric
23900b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
23910b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23920b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
23930b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23940b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
23950b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23960b57cec5SDimitry Andric
23970b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
23980b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
23990b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
24000b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
24010b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
24020b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
24030b57cec5SDimitry Andric}
24040b57cec5SDimitry Andric
24050b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with
24060b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available.
24070b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
24080b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
24090b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
24100b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
24110b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
24120b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
24130b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
24140b57cec5SDimitry Andric  def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
24150b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
24160b57cec5SDimitry Andric
24170b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
24180b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
24190b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
24200b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
24210b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
24220b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
24230b57cec5SDimitry Andric  def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
24240b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
24250b57cec5SDimitry Andric
24260b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
24270b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
24280b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
24290b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
24300b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
24310b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
24320b57cec5SDimitry Andric  def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
24330b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
24340b57cec5SDimitry Andric
24350b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
24360b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
24370b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
24380b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
24390b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
24400b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
24410b57cec5SDimitry Andric  def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
24420b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
24430b57cec5SDimitry Andric
24440b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
24450b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24460b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
24470b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24480b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
24490b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24500b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
24510b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24520b57cec5SDimitry Andric
24530b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
24540b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24550b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
24560b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24570b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
24580b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24590b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
24600b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24610b57cec5SDimitry Andric
24620b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
24630b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24640b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
24650b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24660b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
24670b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24680b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
24690b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24700b57cec5SDimitry Andric
24710b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
24720b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24730b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
24740b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24750b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
24760b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24770b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
24780b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24790b57cec5SDimitry Andric}
24800b57cec5SDimitry Andric
24810b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
24820b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
24830b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24840b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
24850b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24860b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
24870b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24880b57cec5SDimitry Andric
24890b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
24900b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24910b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
24920b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24930b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
24940b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24950b57cec5SDimitry Andric
24960b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
24970b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
24980b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
24990b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
25000b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
25010b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
25020b57cec5SDimitry Andric
25030b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
25040b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
25050b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
25060b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
25070b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
25080b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
25090b57cec5SDimitry Andric
25100b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
25110b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
25120b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
25130b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
25140b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
25150b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
25160b57cec5SDimitry Andric
25170b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
25180b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
25190b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
25200b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
25210b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
25220b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
25230b57cec5SDimitry Andric
25240b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
25250b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
25260b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
25270b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
25280b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
25290b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
25300b57cec5SDimitry Andric
25310b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
25320b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
25330b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
25340b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
25350b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
25360b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
25370b57cec5SDimitry Andric}
25380b57cec5SDimitry Andric
25390b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
25400b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
25410b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25420b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
25430b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25440b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
25450b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25460b57cec5SDimitry Andric
25470b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
25480b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25490b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
25500b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25510b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
25520b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25530b57cec5SDimitry Andric
25540b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
25550b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25560b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
25570b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25580b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
25590b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25600b57cec5SDimitry Andric
25610b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
25620b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25630b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
25640b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25650b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
25660b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25670b57cec5SDimitry Andric
25680b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
25690b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25700b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
25710b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25720b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
25730b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25740b57cec5SDimitry Andric
25750b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
25760b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25770b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
25780b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25790b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
25800b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25810b57cec5SDimitry Andric
25820b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
25830b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25840b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
25850b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25860b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
25870b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25880b57cec5SDimitry Andric
25890b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
25900b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25910b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
25920b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25930b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
25940b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25950b57cec5SDimitry Andric}
25960b57cec5SDimitry Andric
25970b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available.
25980b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
25990b57cec5SDimitry Andric          (ANDPSrr VR128:$src1, VR128:$src2)>;
26000b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
26010b57cec5SDimitry Andric          (ORPSrr VR128:$src1, VR128:$src2)>;
26020b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
26030b57cec5SDimitry Andric          (XORPSrr VR128:$src1, VR128:$src2)>;
26040b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
26050b57cec5SDimitry Andric          (ANDNPSrr VR128:$src1, VR128:$src2)>;
26060b57cec5SDimitry Andric
26070b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
26080b57cec5SDimitry Andric          (ANDPSrm VR128:$src1, addr:$src2)>;
26090b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
26100b57cec5SDimitry Andric          (ORPSrm VR128:$src1, addr:$src2)>;
26110b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
26120b57cec5SDimitry Andric          (XORPSrm VR128:$src1, addr:$src2)>;
26130b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
26140b57cec5SDimitry Andric          (ANDNPSrm VR128:$src1, addr:$src2)>;
26150b57cec5SDimitry Andric
26160b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
26170b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions
26180b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
26190b57cec5SDimitry Andric
26200b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
26210b57cec5SDimitry Andric/// vector forms.
26220b57cec5SDimitry Andric///
26230b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
26240b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
26250b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar)
26260b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted).
26270b57cec5SDimitry Andric///
26280b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem.
26290b57cec5SDimitry Andric///
26300b57cec5SDimitry Andric
26310b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
26320b57cec5SDimitry Andric/// classes below
26330b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
2634fe6060f1SDimitry Andric                                  SDPatternOperator OpNode, X86SchedWriteSizes sched> {
2635480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
26360b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
26370b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
26380b57cec5SDimitry Andric                               VR128, v4f32, f128mem, loadv4f32,
263906c3fb27SDimitry Andric                               SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, WIG;
26400b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
26410b57cec5SDimitry Andric                               VR128, v2f64, f128mem, loadv2f64,
264206c3fb27SDimitry Andric                               SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, WIG;
26430b57cec5SDimitry Andric
26440b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
26450b57cec5SDimitry Andric                        OpNode, VR256, v8f32, f256mem, loadv8f32,
264606c3fb27SDimitry Andric                        SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, WIG;
26470b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
26480b57cec5SDimitry Andric                        OpNode, VR256, v4f64, f256mem, loadv4f64,
264906c3fb27SDimitry Andric                        SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, WIG;
26500b57cec5SDimitry Andric  }
26510b57cec5SDimitry Andric
26520b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
26530b57cec5SDimitry Andric    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
26540b57cec5SDimitry Andric                              v4f32, f128mem, memopv4f32, SSEPackedSingle,
26550b57cec5SDimitry Andric                              sched.PS.XMM>, PS;
26560b57cec5SDimitry Andric    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
26570b57cec5SDimitry Andric                              v2f64, f128mem, memopv2f64, SSEPackedDouble,
26580b57cec5SDimitry Andric                              sched.PD.XMM>, PD;
26590b57cec5SDimitry Andric  }
26600b57cec5SDimitry Andric}
2661480093f4SDimitry Andric}
26620b57cec5SDimitry Andric
2663fe6060f1SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
26640b57cec5SDimitry Andric                                  X86SchedWriteSizes sched> {
2665480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
26660b57cec5SDimitry Andric  defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
26670b57cec5SDimitry Andric                         OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
266806c3fb27SDimitry Andric                         XS, VEX_4V, VEX_LIG, WIG;
26690b57cec5SDimitry Andric  defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
26700b57cec5SDimitry Andric                         OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
267106c3fb27SDimitry Andric                         XD, VEX_4V, VEX_LIG, WIG;
26720b57cec5SDimitry Andric
26730b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
26740b57cec5SDimitry Andric    defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
26750b57cec5SDimitry Andric                              OpNode, FR32, f32mem, SSEPackedSingle,
26760b57cec5SDimitry Andric                              sched.PS.Scl>, XS;
26770b57cec5SDimitry Andric    defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
26780b57cec5SDimitry Andric                              OpNode, FR64, f64mem, SSEPackedDouble,
26790b57cec5SDimitry Andric                              sched.PD.Scl>, XD;
26800b57cec5SDimitry Andric  }
26810b57cec5SDimitry Andric}
2682480093f4SDimitry Andric}
26830b57cec5SDimitry Andric
26840b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
26850b57cec5SDimitry Andric                                      SDPatternOperator OpNode,
26860b57cec5SDimitry Andric                                      X86SchedWriteSizes sched> {
2687480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
2688349cc55cSDimitry Andric  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
26890b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
269006c3fb27SDimitry Andric                   SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, WIG;
2691349cc55cSDimitry Andric  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
26920b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
269306c3fb27SDimitry Andric                   SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, WIG;
26940b57cec5SDimitry Andric
26950b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2696349cc55cSDimitry Andric    defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
26970b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
26980b57cec5SDimitry Andric                   SSEPackedSingle, sched.PS.Scl>, XS;
2699349cc55cSDimitry Andric    defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
27000b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
27010b57cec5SDimitry Andric                   SSEPackedDouble, sched.PD.Scl>, XD;
27020b57cec5SDimitry Andric  }
27030b57cec5SDimitry Andric}
2704480093f4SDimitry Andric}
27050b57cec5SDimitry Andric
27060b57cec5SDimitry Andric// Binary Arithmetic instructions
2707480093f4SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>,
2708480093f4SDimitry Andric           basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>,
27090b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
2710480093f4SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
2711480093f4SDimitry Andric           basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
27120b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
27130b57cec5SDimitry Andriclet isCommutable = 0 in {
2714480093f4SDimitry Andric  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
2715480093f4SDimitry Andric             basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
27160b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
2717480093f4SDimitry Andric  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
2718480093f4SDimitry Andric             basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
27190b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
27200b57cec5SDimitry Andric  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
27210b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
27220b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
27230b57cec5SDimitry Andric  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
27240b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
27250b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
27260b57cec5SDimitry Andric}
27270b57cec5SDimitry Andric
27280b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
27290b57cec5SDimitry Andric  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
27300b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
27310b57cec5SDimitry Andric  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
27320b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
27330b57cec5SDimitry Andric}
27340b57cec5SDimitry Andric
27350b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from
27360b57cec5SDimitry Andric// either:
27370b57cec5SDimitry Andric//
27380b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend
27390b57cec5SDimitry Andric//
27400b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector
27410b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions
27420b57cec5SDimitry Andric// like addss or mulss.
27430b57cec5SDimitry Andric//
27440b57cec5SDimitry Andric// For example, given the following code:
27450b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
27460b57cec5SDimitry Andric//     A[0] += B[0];
27470b57cec5SDimitry Andric//     return A;
27480b57cec5SDimitry Andric//   }
27490b57cec5SDimitry Andric//
27500b57cec5SDimitry Andric// Previously we generated:
27510b57cec5SDimitry Andric//   addss %xmm0, %xmm1
27520b57cec5SDimitry Andric//   movss %xmm1, %xmm0
27530b57cec5SDimitry Andric//
27540b57cec5SDimitry Andric// We now generate:
27550b57cec5SDimitry Andric//   addss %xmm1, %xmm0
27560b57cec5SDimitry Andric//
27570b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert
27580b57cec5SDimitry Andric//
27590b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction
27600b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction.
27610b57cec5SDimitry Andric//
27620b57cec5SDimitry Andric// For example, given the following code:
27630b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
27640b57cec5SDimitry Andric//     __m128 C = A + B;
27650b57cec5SDimitry Andric//     return (__m128) {c[0], a[1], a[2], a[3]};
27660b57cec5SDimitry Andric//   }
27670b57cec5SDimitry Andric//
27680b57cec5SDimitry Andric// Previously we generated:
27690b57cec5SDimitry Andric//   addps %xmm0, %xmm1
27700b57cec5SDimitry Andric//   movss %xmm1, %xmm0
27710b57cec5SDimitry Andric//
27720b57cec5SDimitry Andric// We now generate:
27730b57cec5SDimitry Andric//   addss %xmm1, %xmm0
27740b57cec5SDimitry Andric
27750b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of
27760b57cec5SDimitry Andric// patterns we have to try to match.
2777fe6060f1SDimitry Andricmulticlass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move,
27780b57cec5SDimitry Andric                                ValueType VT, ValueType EltTy,
27790b57cec5SDimitry Andric                                RegisterClass RC, PatFrag ld_frag,
27800b57cec5SDimitry Andric                                Predicate BasePredicate> {
27810b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
27820b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
27830b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27840b57cec5SDimitry Andric                        (VT (scalar_to_vector
27850b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27860b57cec5SDimitry Andric                                 RC:$src))))),
27870b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
27880b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
27890b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27900b57cec5SDimitry Andric                        (VT (scalar_to_vector
27910b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27920b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
27930b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
27940b57cec5SDimitry Andric  }
27950b57cec5SDimitry Andric
27960b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
27970b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
27980b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
27990b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
28000b57cec5SDimitry Andric                        (VT (scalar_to_vector
28010b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
28020b57cec5SDimitry Andric                                 RC:$src))))),
28030b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
28040b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
28050b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
28060b57cec5SDimitry Andric                        (VT (scalar_to_vector
28070b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
28080b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
28090b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
28100b57cec5SDimitry Andric  }
28110b57cec5SDimitry Andric}
28120b57cec5SDimitry Andric
2813480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2814480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2815480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2816480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
28170b57cec5SDimitry Andric
2818480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2819480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2820480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2821480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
28220b57cec5SDimitry Andric
28230b57cec5SDimitry Andric/// Unop Arithmetic
28240b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
28250b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
28260b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a
28270b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined.
28280b57cec5SDimitry Andric///
28290b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form.
28300b57cec5SDimitry Andric
28310b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form
28320b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because
28330b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive.
28340b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
2835349cc55cSDimitry Andric                          X86MemOperand x86memop, Operand intmemop,
2836349cc55cSDimitry Andric                          SDPatternOperator OpNode, Domain d,
28370b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
28380b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
28390b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
28400b57cec5SDimitry Andric              !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
28410b57cec5SDimitry Andric            [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
28420b57cec5SDimitry Andric            Requires<[target]>;
28430b57cec5SDimitry Andric  let mayLoad = 1 in
28440b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
28450b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
28460b57cec5SDimitry Andric            [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
28470b57cec5SDimitry Andric            Sched<[sched.Folded]>,
28480b57cec5SDimitry Andric            Requires<[target, OptForSize]>;
28490b57cec5SDimitry Andric  }
28500b57cec5SDimitry Andric
28510b57cec5SDimitry Andric  let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
28520b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
28530b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
28540b57cec5SDimitry Andric                Sched<[sched]>;
28550b57cec5SDimitry Andric  let mayLoad = 1 in
28560b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
28570b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
28580b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold]>;
28590b57cec5SDimitry Andric  }
28600b57cec5SDimitry Andric
28610b57cec5SDimitry Andric}
28620b57cec5SDimitry Andric
2863349cc55cSDimitry Andricmulticlass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
2864349cc55cSDimitry Andric                              Intrinsic Intr, Predicate target> {
28650b57cec5SDimitry Andric  let Predicates = [target] in {
28660b57cec5SDimitry Andric  // These are unary operations, but they are modeled as having 2 source operands
28670b57cec5SDimitry Andric  // because the high elements of the destination are unchanged in SSE.
28680b57cec5SDimitry Andric  def : Pat<(Intr VR128:$src),
28690b57cec5SDimitry Andric            (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>;
28700b57cec5SDimitry Andric  }
28710b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
28720b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
28730b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
28740b57cec5SDimitry Andric  // movss mem, %xmm0
28750b57cec5SDimitry Andric  // rcpss %xmm0, %xmm0
28760b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
28770b57cec5SDimitry Andric  // rcpss mem, %xmm0
28780b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
28795ffd83dbSDimitry Andric    def : Pat<(Intr (mem_frags addr:$src2)),
28800b57cec5SDimitry Andric               (!cast<Instruction>(NAME#m_Int)
28810b57cec5SDimitry Andric                      (vt (IMPLICIT_DEF)), addr:$src2)>;
28820b57cec5SDimitry Andric  }
28830b57cec5SDimitry Andric}
28840b57cec5SDimitry Andric
2885349cc55cSDimitry Andricmulticlass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
28860b57cec5SDimitry Andric                              Intrinsic Intr, Predicate target> {
28870b57cec5SDimitry Andric  let Predicates = [target] in {
28880b57cec5SDimitry Andric   def : Pat<(Intr VR128:$src),
28890b57cec5SDimitry Andric             (!cast<Instruction>(NAME#r_Int) VR128:$src,
28900b57cec5SDimitry Andric                                 VR128:$src)>;
28910b57cec5SDimitry Andric  }
28920b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
28935ffd83dbSDimitry Andric    def : Pat<(Intr (mem_frags addr:$src2)),
28940b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m_Int)
28950b57cec5SDimitry Andric                    (vt (IMPLICIT_DEF)), addr:$src2)>;
28960b57cec5SDimitry Andric  }
28970b57cec5SDimitry Andric}
28980b57cec5SDimitry Andric
28990b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
29000b57cec5SDimitry Andric                          ValueType ScalarVT, X86MemOperand x86memop,
2901fe6060f1SDimitry Andric                          Operand intmemop, SDPatternOperator OpNode, Domain d,
29020b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
29030b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
29040b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
29050b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
29060b57cec5SDimitry Andric            [], d>, Sched<[sched]>;
29070b57cec5SDimitry Andric  let mayLoad = 1 in
29080b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
29090b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
29100b57cec5SDimitry Andric            [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
29110b57cec5SDimitry Andric  }
29120b57cec5SDimitry Andric  let hasSideEffects = 0, ExeDomain = d in {
29130b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
29140b57cec5SDimitry Andric                (ins VR128:$src1, VR128:$src2),
29150b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
29160b57cec5SDimitry Andric             []>, Sched<[sched]>;
29170b57cec5SDimitry Andric  let mayLoad = 1 in
29180b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
29190b57cec5SDimitry Andric                (ins VR128:$src1, intmemop:$src2),
29200b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
29210b57cec5SDimitry Andric             []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
29220b57cec5SDimitry Andric  }
29230b57cec5SDimitry Andric
29240b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
29250b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
29260b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
29270b57cec5SDimitry Andric  // vmovss mem, %xmm0
29280b57cec5SDimitry Andric  // vrcpss %xmm0, %xmm0, %xmm0
29290b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
29300b57cec5SDimitry Andric  // vrcpss mem, %xmm0, %xmm0
29310b57cec5SDimitry Andric  // TODO: In theory, we could fold the load, and avoid the stall caused by
29320b57cec5SDimitry Andric  // the partial register store, either in BreakFalseDeps or with smarter RA.
29330b57cec5SDimitry Andric  let Predicates = [target] in {
29340b57cec5SDimitry Andric   def : Pat<(OpNode RC:$src),  (!cast<Instruction>(NAME#r)
29350b57cec5SDimitry Andric                                (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
29360b57cec5SDimitry Andric  }
29370b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
29380b57cec5SDimitry Andric    def : Pat<(ScalarVT (OpNode (load addr:$src))),
29390b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)),
29400b57cec5SDimitry Andric            addr:$src)>;
29410b57cec5SDimitry Andric  }
29420b57cec5SDimitry Andric}
29430b57cec5SDimitry Andric
29440b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form.
2945fe6060f1SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
29460b57cec5SDimitry Andric                          X86SchedWriteWidths sched, list<Predicate> prds> {
29470b57cec5SDimitry Andriclet Predicates = prds in {
29480b57cec5SDimitry Andric  def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29490b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29500b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
29510b57cec5SDimitry Andric                       [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
295206c3fb27SDimitry Andric                       VEX, Sched<[sched.XMM]>, WIG;
29530b57cec5SDimitry Andric  def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29540b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29550b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
29560b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
295706c3fb27SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, WIG;
29580b57cec5SDimitry Andric  def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
29590b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29600b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
29610b57cec5SDimitry Andric                        [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
296206c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, WIG;
29630b57cec5SDimitry Andric  def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
29640b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29650b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
29660b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
296706c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG;
29680b57cec5SDimitry Andric}
29690b57cec5SDimitry Andric
29700b57cec5SDimitry Andric  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29710b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
29720b57cec5SDimitry Andric                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
29730b57cec5SDimitry Andric                Sched<[sched.XMM]>;
29740b57cec5SDimitry Andric  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29750b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
29760b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>,
29770b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
29780b57cec5SDimitry Andric}
29790b57cec5SDimitry Andric
29800b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms.
29810b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
2982fe6060f1SDimitry Andric                          SDPatternOperator OpNode, X86SchedWriteWidths sched> {
29830b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
29840b57cec5SDimitry Andric  def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29850b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29860b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
29870b57cec5SDimitry Andric                       [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
298806c3fb27SDimitry Andric                       VEX, Sched<[sched.XMM]>, WIG;
29890b57cec5SDimitry Andric  def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29900b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29910b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
29920b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
299306c3fb27SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, WIG;
29940b57cec5SDimitry Andric  def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
29950b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29960b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
29970b57cec5SDimitry Andric                        [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
299806c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, WIG;
29990b57cec5SDimitry Andric  def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
30000b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
30010b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
30020b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
300306c3fb27SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG;
30040b57cec5SDimitry Andric}
30050b57cec5SDimitry Andric
30060b57cec5SDimitry Andric  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
30070b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
30080b57cec5SDimitry Andric                [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
30090b57cec5SDimitry Andric                Sched<[sched.XMM]>;
30100b57cec5SDimitry Andric  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
30110b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
30120b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>,
30130b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
30140b57cec5SDimitry Andric}
30150b57cec5SDimitry Andric
3016349cc55cSDimitry Andricmulticlass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
3017349cc55cSDimitry Andric  defm SS        :  sse_fp_unop_s_intr<v4f32, sse_load_f32,
30185ffd83dbSDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
3019349cc55cSDimitry Andric                      UseSSE1>, XS;
3020349cc55cSDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s_intr<v4f32, sse_load_f32,
30215ffd83dbSDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
30220b57cec5SDimitry Andric                      AVXTarget>,
302306c3fb27SDimitry Andric                      XS, VEX_4V, VEX_LIG, WIG;
30240b57cec5SDimitry Andric}
30250b57cec5SDimitry Andric
3026fe6060f1SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
30270b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
3028349cc55cSDimitry Andric  defm SS        :  sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem,
30290b57cec5SDimitry Andric                      ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
30305ffd83dbSDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
30310b57cec5SDimitry Andric                      f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
303206c3fb27SDimitry Andric                       XS, VEX_4V, VEX_LIG, WIG;
30330b57cec5SDimitry Andric}
30340b57cec5SDimitry Andric
3035fe6060f1SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
30360b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
3037349cc55cSDimitry Andric  defm SD         : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem,
30380b57cec5SDimitry Andric                         sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
30395ffd83dbSDimitry Andric  defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
30400b57cec5SDimitry Andric                         f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
304106c3fb27SDimitry Andric                         XD, VEX_4V, VEX_LIG, WIG;
30420b57cec5SDimitry Andric}
30430b57cec5SDimitry Andric
30440b57cec5SDimitry Andric// Square root.
3045480093f4SDimitry Andricdefm SQRT  : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
3046480093f4SDimitry Andric             sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
3047480093f4SDimitry Andric             sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>,
3048480093f4SDimitry Andric             sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
30490b57cec5SDimitry Andric
30500b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement
30510b57cec5SDimitry Andric// in order to obtain suitable precision.
30520b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
3053349cc55cSDimitry Andric             sse1_fp_unop_s_intr<"rsqrt", HasAVX>,
30540b57cec5SDimitry Andric             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
30550b57cec5SDimitry Andricdefm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
3056349cc55cSDimitry Andric             sse1_fp_unop_s_intr<"rcp", HasAVX>,
30570b57cec5SDimitry Andric             sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
30580b57cec5SDimitry Andric
30590b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions.
30600b57cec5SDimitry Andric
3061fe6060f1SDimitry Andricmulticlass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move,
30620b57cec5SDimitry Andric                                      ValueType VT, Predicate BasePredicate> {
30630b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
30640b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
30650b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
30660b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30670b57cec5SDimitry Andric  }
30680b57cec5SDimitry Andric
30690b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
30700b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
30710b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
30720b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
30730b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30740b57cec5SDimitry Andric  }
30750b57cec5SDimitry Andric}
30760b57cec5SDimitry Andric
3077480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
3078480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
30790b57cec5SDimitry Andric
30800b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
30810b57cec5SDimitry Andric                                           SDNode Move, ValueType VT,
30820b57cec5SDimitry Andric                                           Predicate BasePredicate> {
30830b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
30840b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
30850b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30860b57cec5SDimitry Andric  }
30870b57cec5SDimitry Andric
30880b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
30890b57cec5SDimitry Andric  let Predicates = [HasAVX] in {
30900b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
30910b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30920b57cec5SDimitry Andric  }
30930b57cec5SDimitry Andric}
30940b57cec5SDimitry Andric
30950b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
30960b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
30970b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
30980b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
30990b57cec5SDimitry Andric
31000b57cec5SDimitry Andric
31010b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
31020b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores
31030b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
31040b57cec5SDimitry Andric
31050b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
31060b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
31070b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
31080b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
31090b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
31100b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
31110b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f32 VR128:$src),
311206c3fb27SDimitry Andric                                               addr:$dst)]>, VEX, WIG;
31130b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
31140b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
31150b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
31160b57cec5SDimitry Andric                     [(alignednontemporalstore (v2f64 VR128:$src),
311706c3fb27SDimitry Andric                                               addr:$dst)]>, VEX, WIG;
31180b57cec5SDimitry Andric} // SchedRW
31190b57cec5SDimitry Andric
31200b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
31210b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
31220b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
31230b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
31240b57cec5SDimitry Andric                     [(alignednontemporalstore (v8f32 VR256:$src),
312506c3fb27SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, WIG;
31260b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
31270b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
31280b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
31290b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f64 VR256:$src),
313006c3fb27SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, WIG;
31310b57cec5SDimitry Andric} // SchedRW
31320b57cec5SDimitry Andric
31330b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
31340b57cec5SDimitry Andricdef VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
31350b57cec5SDimitry Andric                         (ins i128mem:$dst, VR128:$src),
31360b57cec5SDimitry Andric                         "movntdq\t{$src, $dst|$dst, $src}",
31370b57cec5SDimitry Andric                         [(alignednontemporalstore (v2i64 VR128:$src),
313806c3fb27SDimitry Andric                                                   addr:$dst)]>, VEX, WIG,
31390b57cec5SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
31400b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
31410b57cec5SDimitry Andric                    (ins i256mem:$dst, VR256:$src),
31420b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
31430b57cec5SDimitry Andric                    [(alignednontemporalstore (v4i64 VR256:$src),
314406c3fb27SDimitry Andric                                              addr:$dst)]>, VEX, VEX_L, WIG,
31450b57cec5SDimitry Andric                    Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
31460b57cec5SDimitry Andric} // ExeDomain
31470b57cec5SDimitry Andric} // Predicates
31480b57cec5SDimitry Andric
31490b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
31500b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31510b57cec5SDimitry Andric                    "movntps\t{$src, $dst|$dst, $src}",
31520b57cec5SDimitry Andric                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
31530b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31540b57cec5SDimitry Andric                    "movntpd\t{$src, $dst|$dst, $src}",
31550b57cec5SDimitry Andric                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
31560b57cec5SDimitry Andric} // SchedRW
31570b57cec5SDimitry Andric
31580b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
31590b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31600b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
31610b57cec5SDimitry Andric                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
31620b57cec5SDimitry Andric
31630b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in {
31640b57cec5SDimitry Andric// There is no AVX form for instructions below this point
31650b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
31660b57cec5SDimitry Andric                 "movnti{l}\t{$src, $dst|$dst, $src}",
31670b57cec5SDimitry Andric                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
31680b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
31690b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
31700b57cec5SDimitry Andric                     "movnti{q}\t{$src, $dst|$dst, $src}",
31710b57cec5SDimitry Andric                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
31720b57cec5SDimitry Andric                  PS, Requires<[HasSSE2]>;
31730b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT]
31740b57cec5SDimitry Andric
31750b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
31760b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
31770b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31780b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
31790b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3180fcaf7f86SDimitry Andric  def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst),
3181fcaf7f86SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31820b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
31830b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31840b57cec5SDimitry Andric
31850b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
31860b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31870b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
31880b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
3189fcaf7f86SDimitry Andric  def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst),
3190fcaf7f86SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31910b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
31920b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31930b57cec5SDimitry Andric}
31940b57cec5SDimitry Andric
31950b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
31960b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
31970b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
31980b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
31990b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
3200fcaf7f86SDimitry Andric  def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst),
3201fcaf7f86SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
32020b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
32030b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
32040b57cec5SDimitry Andric}
32050b57cec5SDimitry Andric
32060b57cec5SDimitry Andric} // AddedComplexity
32070b57cec5SDimitry Andric
32080b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32090b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence
32100b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32110b57cec5SDimitry Andric
32120b57cec5SDimitry Andric// Prefetch intrinsic.
32130b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
32140b57cec5SDimitry Andricdef PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
32150b57cec5SDimitry Andric    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
32160b57cec5SDimitry Andricdef PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
32170b57cec5SDimitry Andric    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
32180b57cec5SDimitry Andricdef PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
32190b57cec5SDimitry Andric    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
32200b57cec5SDimitry Andricdef PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
32210b57cec5SDimitry Andric    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
32220b57cec5SDimitry Andric}
32230b57cec5SDimitry Andric
32240b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled?
32250b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in {
32260b57cec5SDimitry Andric// Flush cache
32270b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
32280b57cec5SDimitry Andric               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
3229bdd1243dSDimitry Andric               PS, Requires<[HasCLFLUSH]>;
32300b57cec5SDimitry Andric}
32310b57cec5SDimitry Andric
32320b57cec5SDimitry Andriclet SchedRW = [WriteNop] in {
32330b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it
32340b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible.
32350b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins),
32360b57cec5SDimitry Andric              "pause", [(int_x86_sse2_pause)]>, OBXS;
32370b57cec5SDimitry Andric}
32380b57cec5SDimitry Andric
32390b57cec5SDimitry Andriclet SchedRW = [WriteFence] in {
32400b57cec5SDimitry Andric// Load, store, and memory fence
32415ffd83dbSDimitry Andric// TODO: As with mfence, we may want to ease the availability of sfence/lfence
32420b57cec5SDimitry Andric// to include any 64-bit target.
32435ffd83dbSDimitry Andricdef SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
32440b57cec5SDimitry Andric               PS, Requires<[HasSSE1]>;
32455ffd83dbSDimitry Andricdef LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
32460b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
32475ffd83dbSDimitry Andricdef MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
32480b57cec5SDimitry Andric               PS, Requires<[HasMFence]>;
32490b57cec5SDimitry Andric} // SchedRW
32500b57cec5SDimitry Andric
32510b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>;
32520b57cec5SDimitry Andric
32530b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32540b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register
32550b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32560b57cec5SDimitry Andric
3257bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in
32580b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
32590b57cec5SDimitry Andric               "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
326006c3fb27SDimitry Andric               VEX, Sched<[WriteLDMXCSR]>, WIG;
3261bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in
32620b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
32630b57cec5SDimitry Andric               "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
326406c3fb27SDimitry Andric               VEX, Sched<[WriteSTMXCSR]>, WIG;
32650b57cec5SDimitry Andric
3266bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in
32670b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
32680b57cec5SDimitry Andric              "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
32695ffd83dbSDimitry Andric              PS, Sched<[WriteLDMXCSR]>;
3270bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in
32710b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
32720b57cec5SDimitry Andric              "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
32735ffd83dbSDimitry Andric              PS, Sched<[WriteSTMXCSR]>;
32740b57cec5SDimitry Andric
32750b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
32760b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
32770b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
32780b57cec5SDimitry Andric
32790b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
32800b57cec5SDimitry Andric
32810b57cec5SDimitry Andriclet hasSideEffects = 0 in {
32820b57cec5SDimitry Andricdef VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
32830b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
328406c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG;
32850b57cec5SDimitry Andricdef VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
32860b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
328706c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG;
32880b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
32890b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
329006c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG;
32910b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
32920b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
329306c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG;
32940b57cec5SDimitry Andric}
32950b57cec5SDimitry Andric
32960b57cec5SDimitry Andric// For Disassembler
32970b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
32980b57cec5SDimitry Andricdef VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
32990b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
33000b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
330106c3fb27SDimitry Andric                          VEX, WIG;
33020b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
33030b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
33040b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
330506c3fb27SDimitry Andric                          VEX, VEX_L, WIG;
33060b57cec5SDimitry Andricdef VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
33070b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
33080b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
330906c3fb27SDimitry Andric                          VEX, WIG;
33100b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
33110b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
33120b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
331306c3fb27SDimitry Andric                          VEX, VEX_L, WIG;
33140b57cec5SDimitry Andric}
33150b57cec5SDimitry Andric
33160b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
33170b57cec5SDimitry Andric    hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
33180b57cec5SDimitry Andricdef VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33190b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
33200b57cec5SDimitry Andric                      [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
332106c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG;
33220b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
33230b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
33240b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RM]>,
332506c3fb27SDimitry Andric                      VEX, VEX_L, WIG;
33260b57cec5SDimitry Andricdef VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33270b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
33280b57cec5SDimitry Andric                   [(set VR128:$dst, (loadv2i64 addr:$src))]>,
33290b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>,
333006c3fb27SDimitry Andric                   XS, VEX, WIG;
33310b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
33320b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}", []>,
33330b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.RM]>,
333406c3fb27SDimitry Andric                   XS, VEX, VEX_L, WIG;
33350b57cec5SDimitry Andric}
33360b57cec5SDimitry Andric
33370b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
33380b57cec5SDimitry Andricdef VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
33390b57cec5SDimitry Andric                      (ins i128mem:$dst, VR128:$src),
33400b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
33410b57cec5SDimitry Andric                      [(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
334206c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, WIG;
33430b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
33440b57cec5SDimitry Andric                      (ins i256mem:$dst, VR256:$src),
33450b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
334606c3fb27SDimitry Andric                     Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, WIG;
33470b57cec5SDimitry Andricdef VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33480b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
33490b57cec5SDimitry Andric                   [(store (v2i64 VR128:$src), addr:$dst)]>,
335006c3fb27SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, WIG;
33510b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
33520b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",[]>,
335306c3fb27SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, WIG;
33540b57cec5SDimitry Andric}
33550b57cec5SDimitry Andric
33560b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
33570b57cec5SDimitry Andriclet hasSideEffects = 0 in {
33580b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
33590b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}", []>;
33600b57cec5SDimitry Andric
33610b57cec5SDimitry Andricdef MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
33620b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}", []>,
33630b57cec5SDimitry Andric                   XS, Requires<[UseSSE2]>;
33640b57cec5SDimitry Andric}
33650b57cec5SDimitry Andric
33660b57cec5SDimitry Andric// For Disassembler
33670b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
33680b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
336906c3fb27SDimitry Andric                       "movdqa\t{$src, $dst|$dst, $src}", []>;
33700b57cec5SDimitry Andric
33710b57cec5SDimitry Andricdef MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
33720b57cec5SDimitry Andric                       "movdqu\t{$src, $dst|$dst, $src}", []>,
337306c3fb27SDimitry Andric                       XS, Requires<[UseSSE2]>;
33740b57cec5SDimitry Andric}
33750b57cec5SDimitry Andric} // SchedRW
33760b57cec5SDimitry Andric
33770b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
33780b57cec5SDimitry Andric    hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in {
33790b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33800b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
33810b57cec5SDimitry Andric                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
33820b57cec5SDimitry Andricdef MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33830b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
33840b57cec5SDimitry Andric                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
33850b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
33860b57cec5SDimitry Andric}
33870b57cec5SDimitry Andric
33880b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0,
33890b57cec5SDimitry Andric    SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
33900b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33910b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
33920b57cec5SDimitry Andric                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
33930b57cec5SDimitry Andricdef MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33940b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
33950b57cec5SDimitry Andric                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
33960b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
33970b57cec5SDimitry Andric}
33980b57cec5SDimitry Andric
33990b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
34000b57cec5SDimitry Andric
34010b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
34020b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
34030b57cec5SDimitry Andric                (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
34040b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
34050b57cec5SDimitry Andric                (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>;
34060b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
34070b57cec5SDimitry Andric                (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
34080b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
34090b57cec5SDimitry Andric                (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>;
34100b57cec5SDimitry Andric
34110b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
34120b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}",
34130b57cec5SDimitry Andric                (MOVDQArr_REV VR128:$dst, VR128:$src), 0>;
34140b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
34150b57cec5SDimitry Andric                (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
34160b57cec5SDimitry Andric
34170b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
34180b57cec5SDimitry Andric  // Additional patterns for other integer sizes.
34190b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
34200b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
34210b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
34220b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
3423fcaf7f86SDimitry Andric  def : Pat<(alignedloadv8f16 addr:$src),
3424fcaf7f86SDimitry Andric            (VMOVDQArm addr:$src)>;
34250b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
34260b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
34270b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
34280b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
34290b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
34300b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
3431fcaf7f86SDimitry Andric  def : Pat<(loadv8f16 addr:$src),
3432fcaf7f86SDimitry Andric            (VMOVDQUrm addr:$src)>;
34330b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
34340b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
34350b57cec5SDimitry Andric
34360b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
34370b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
34380b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
34390b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
3440fcaf7f86SDimitry Andric  def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
3441fcaf7f86SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
34420b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
34430b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
34440b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
34450b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
34460b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
34470b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
3448fcaf7f86SDimitry Andric  def : Pat<(store (v8f16 VR128:$src), addr:$dst),
3449fcaf7f86SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
34500b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
34510b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
34520b57cec5SDimitry Andric}
34530b57cec5SDimitry Andric
34540b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
34550b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions
34560b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
34570b57cec5SDimitry Andric
34580b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
34590b57cec5SDimitry Andric
34600b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
34610b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
34620b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
34630b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
34640b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
34650b57cec5SDimitry Andric  let isCommutable = 1 in
34660b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
34670b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
34680b57cec5SDimitry Andric       !if(Is2Addr,
34690b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
34700b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
34710b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
34720b57cec5SDimitry Andric       Sched<[sched]>;
34730b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
34740b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
34750b57cec5SDimitry Andric       !if(Is2Addr,
34760b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
34770b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
34780b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
34790b57cec5SDimitry Andric                                     (memop_frag addr:$src2))))]>,
34800b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
34810b57cec5SDimitry Andric}
34820b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
34830b57cec5SDimitry Andric
34840b57cec5SDimitry Andricdefm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
34850b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34860b57cec5SDimitry Andricdefm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
34870b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34880b57cec5SDimitry Andricdefm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
34890b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
34900b57cec5SDimitry Andricdefm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
34910b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
34920b57cec5SDimitry Andricdefm PADDSB  : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
34930b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34940b57cec5SDimitry Andricdefm PADDSW  : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
34950b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34960b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
34970b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34980b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
34990b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
35000b57cec5SDimitry Andricdefm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
35010b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
35020b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
35030b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
35040b57cec5SDimitry Andricdefm PMULHW  : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
35050b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
35060b57cec5SDimitry Andricdefm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
35070b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35080b57cec5SDimitry Andricdefm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
35090b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35100b57cec5SDimitry Andricdefm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
35110b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
35120b57cec5SDimitry Andricdefm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
35130b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
35140b57cec5SDimitry Andricdefm PSUBSB  : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
35150b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35160b57cec5SDimitry Andricdefm PSUBSW  : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
35170b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35180b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
35190b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35200b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
35210b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
35220b57cec5SDimitry Andricdefm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
35230b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
35240b57cec5SDimitry Andricdefm PMINSW  : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
35250b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
35260b57cec5SDimitry Andricdefm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
35270b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
35280b57cec5SDimitry Andricdefm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
35290b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
353081ad6265SDimitry Andricdefm PAVGB   : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8,
35310b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
353281ad6265SDimitry Andricdefm PAVGW   : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16,
35330b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
35340b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
35350b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX>;
35360b57cec5SDimitry Andric
35370b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
35380b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
35390b57cec5SDimitry Andric                              load, i128mem, SchedWriteVecIMul.XMM, 0>,
354006c3fb27SDimitry Andric                              VEX_4V, WIG;
35410b57cec5SDimitry Andric
35420b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
35430b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
35440b57cec5SDimitry Andric                               VR256, load, i256mem, SchedWriteVecIMul.YMM,
354506c3fb27SDimitry Andric                               0>, VEX_4V, VEX_L, WIG;
35460b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
35470b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
35480b57cec5SDimitry Andric                             memop, i128mem, SchedWriteVecIMul.XMM>;
35490b57cec5SDimitry Andric
35500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
35510b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
35520b57cec5SDimitry Andric                             load, i128mem, SchedWritePSADBW.XMM, 0>,
355306c3fb27SDimitry Andric                             VEX_4V, WIG;
35540b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
35550b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
35560b57cec5SDimitry Andric                             load, i256mem, SchedWritePSADBW.YMM, 0>,
355706c3fb27SDimitry Andric                             VEX_4V, VEX_L, WIG;
35580b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
35590b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
35600b57cec5SDimitry Andric                            memop, i128mem, SchedWritePSADBW.XMM>;
35610b57cec5SDimitry Andric
35620b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
35630b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
35640b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
35650b57cec5SDimitry Andric
35660b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
35670b57cec5SDimitry Andric                         string OpcodeStr, SDNode OpNode,
35680b57cec5SDimitry Andric                         SDNode OpNode2, RegisterClass RC,
35690b57cec5SDimitry Andric                         X86FoldableSchedWrite sched,
35700b57cec5SDimitry Andric                         X86FoldableSchedWrite schedImm,
35710b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT,
35720b57cec5SDimitry Andric                         PatFrag ld_frag, bit Is2Addr = 1> {
35730b57cec5SDimitry Andric  // src2 is always 128-bit
35740b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
35750b57cec5SDimitry Andric       (ins RC:$src1, VR128:$src2),
35760b57cec5SDimitry Andric       !if(Is2Addr,
35770b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35780b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35790b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>,
35800b57cec5SDimitry Andric       Sched<[sched]>;
35810b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
35820b57cec5SDimitry Andric       (ins RC:$src1, i128mem:$src2),
35830b57cec5SDimitry Andric       !if(Is2Addr,
35840b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35850b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35860b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1,
35870b57cec5SDimitry Andric                       (SrcVT (ld_frag addr:$src2)))))]>,
35880b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
35890b57cec5SDimitry Andric  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
35900b57cec5SDimitry Andric       (ins RC:$src1, u8imm:$src2),
35910b57cec5SDimitry Andric       !if(Is2Addr,
35920b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35930b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35948bcb0991SDimitry Andric       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
35950b57cec5SDimitry Andric       Sched<[schedImm]>;
35960b57cec5SDimitry Andric}
35970b57cec5SDimitry Andric
35980b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
35990b57cec5SDimitry Andric                             string OpcodeStr, SDNode OpNode,
36000b57cec5SDimitry Andric                             SDNode OpNode2, ValueType DstVT128,
36010b57cec5SDimitry Andric                             ValueType DstVT256, ValueType SrcVT,
36020b57cec5SDimitry Andric                             X86SchedWriteWidths sched,
36030b57cec5SDimitry Andric                             X86SchedWriteWidths schedImm, Predicate prd> {
36040b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
36050b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
36060b57cec5SDimitry Andric                              OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
360706c3fb27SDimitry Andric                              DstVT128, SrcVT, load, 0>, VEX_4V, WIG;
36080b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
36090b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
36100b57cec5SDimitry Andric                                OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
36110b57cec5SDimitry Andric                                DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
361206c3fb27SDimitry Andric                                WIG;
36130b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
36140b57cec5SDimitry Andric  defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
36150b57cec5SDimitry Andric                            VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
36160b57cec5SDimitry Andric                            memop>;
36170b57cec5SDimitry Andric}
36180b57cec5SDimitry Andric
36190b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
36200b57cec5SDimitry Andric                        SDNode OpNode, RegisterClass RC, ValueType VT,
36210b57cec5SDimitry Andric                        X86FoldableSchedWrite sched, bit Is2Addr = 1> {
36220b57cec5SDimitry Andric  def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
36230b57cec5SDimitry Andric       !if(Is2Addr,
36240b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
36250b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
36268bcb0991SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
36270b57cec5SDimitry Andric       Sched<[sched]>;
36280b57cec5SDimitry Andric}
36290b57cec5SDimitry Andric
36300b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
36310b57cec5SDimitry Andric                            SDNode OpNode, X86SchedWriteWidths sched> {
36320b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
36330b57cec5SDimitry Andric  defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
363406c3fb27SDimitry Andric                             VR128, v16i8, sched.XMM, 0>, VEX_4V, WIG;
36350b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
36360b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
36370b57cec5SDimitry Andric                               VR256, v32i8, sched.YMM, 0>,
363806c3fb27SDimitry Andric                               VEX_4V, VEX_L, WIG;
36390b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
36400b57cec5SDimitry Andric  defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
36410b57cec5SDimitry Andric                           sched.XMM>;
36420b57cec5SDimitry Andric}
36430b57cec5SDimitry Andric
36440b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
36450b57cec5SDimitry Andric  defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
36460b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
36470b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
36480b57cec5SDimitry Andric  defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
36490b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
36500b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36510b57cec5SDimitry Andric  defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
36520b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
36530b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36540b57cec5SDimitry Andric
36550b57cec5SDimitry Andric  defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
36560b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
36570b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
36580b57cec5SDimitry Andric  defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
36590b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
36600b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36610b57cec5SDimitry Andric  defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
36620b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
36630b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36640b57cec5SDimitry Andric
36650b57cec5SDimitry Andric  defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
36660b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
36670b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
36680b57cec5SDimitry Andric  defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
36690b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
36700b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36710b57cec5SDimitry Andric
36720b57cec5SDimitry Andric  defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq,
36730b57cec5SDimitry Andric                                 SchedWriteShuffle>;
36740b57cec5SDimitry Andric  defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq,
36750b57cec5SDimitry Andric                                 SchedWriteShuffle>;
36760b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
36770b57cec5SDimitry Andric
36780b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36790b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions
36800b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36810b57cec5SDimitry Andric
36820b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
36830b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36840b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
36850b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36860b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
36870b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36880b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
36890b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36900b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
36910b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36920b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
36930b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36940b57cec5SDimitry Andric
36950b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36960b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions
36970b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36980b57cec5SDimitry Andric
36990b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
37000b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
37010b57cec5SDimitry Andric                         SDNode OpNode, X86SchedWriteWidths sched,
37020b57cec5SDimitry Andric                         Predicate prd> {
37030b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in {
37040b57cec5SDimitry Andric  def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
37050b57cec5SDimitry Andric                      (ins VR128:$src1, u8imm:$src2),
37060b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
37070b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37080b57cec5SDimitry Andric                      [(set VR128:$dst,
37098bcb0991SDimitry Andric                        (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
371006c3fb27SDimitry Andric                      VEX, Sched<[sched.XMM]>, WIG;
37110b57cec5SDimitry Andric  def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
37120b57cec5SDimitry Andric                      (ins i128mem:$src1, u8imm:$src2),
37130b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
37140b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37150b57cec5SDimitry Andric                     [(set VR128:$dst,
37160b57cec5SDimitry Andric                       (vt128 (OpNode (load addr:$src1),
37178bcb0991SDimitry Andric                        (i8 timm:$src2))))]>, VEX,
371806c3fb27SDimitry Andric                  Sched<[sched.XMM.Folded]>, WIG;
37190b57cec5SDimitry Andric}
37200b57cec5SDimitry Andric
37210b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in {
37220b57cec5SDimitry Andric  def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
37230b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
37240b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
37250b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37260b57cec5SDimitry Andric                       [(set VR256:$dst,
37278bcb0991SDimitry Andric                         (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
372806c3fb27SDimitry Andric                       VEX, VEX_L, Sched<[sched.YMM]>, WIG;
37290b57cec5SDimitry Andric  def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
37300b57cec5SDimitry Andric                       (ins i256mem:$src1, u8imm:$src2),
37310b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
37320b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37330b57cec5SDimitry Andric                      [(set VR256:$dst,
37340b57cec5SDimitry Andric                        (vt256 (OpNode (load addr:$src1),
37358bcb0991SDimitry Andric                         (i8 timm:$src2))))]>, VEX, VEX_L,
373606c3fb27SDimitry Andric                   Sched<[sched.YMM.Folded]>, WIG;
37370b57cec5SDimitry Andric}
37380b57cec5SDimitry Andric
37390b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
37400b57cec5SDimitry Andric  def ri : Ii8<0x70, MRMSrcReg,
37410b57cec5SDimitry Andric               (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
37420b57cec5SDimitry Andric               !strconcat(OpcodeStr,
37430b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37440b57cec5SDimitry Andric               [(set VR128:$dst,
37458bcb0991SDimitry Andric                 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
37460b57cec5SDimitry Andric               Sched<[sched.XMM]>;
37470b57cec5SDimitry Andric  def mi : Ii8<0x70, MRMSrcMem,
37480b57cec5SDimitry Andric               (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
37490b57cec5SDimitry Andric               !strconcat(OpcodeStr,
37500b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
37510b57cec5SDimitry Andric               [(set VR128:$dst,
37520b57cec5SDimitry Andric                 (vt128 (OpNode (memop addr:$src1),
37538bcb0991SDimitry Andric                        (i8 timm:$src2))))]>,
37540b57cec5SDimitry Andric               Sched<[sched.XMM.Folded]>;
37550b57cec5SDimitry Andric}
37560b57cec5SDimitry Andric}
37570b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
37580b57cec5SDimitry Andric
37590b57cec5SDimitry Andricdefm PSHUFD  : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd,
37600b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX>, PD;
37610b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
37620b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XS;
37630b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
37640b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XD;
37650b57cec5SDimitry Andric
37660b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
37670b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX)
37680b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
37690b57cec5SDimitry Andric
37700b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
37710b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
37720b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
37730b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
37740b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
37750b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
37760b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, RC:$src2),
37770b57cec5SDimitry Andric               !if(Is2Addr,
37780b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37790b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
37800b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37810b57cec5SDimitry Andric               [(set RC:$dst,
37820b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
37830b57cec5SDimitry Andric               Sched<[sched]>;
37840b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
37850b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
37860b57cec5SDimitry Andric               !if(Is2Addr,
37870b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37880b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
37890b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37900b57cec5SDimitry Andric               [(set RC:$dst,
37910b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1),
37920b57cec5SDimitry Andric                                    (ld_frag addr:$src2))))]>,
37930b57cec5SDimitry Andric               Sched<[sched.Folded, sched.ReadAfterFold]>;
37940b57cec5SDimitry Andric}
37950b57cec5SDimitry Andric
37960b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
37970b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
37980b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
37990b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
38000b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg,
38010b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
38020b57cec5SDimitry Andric                 !if(Is2Addr,
38030b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
38040b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
38050b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38060b57cec5SDimitry Andric                 [(set RC:$dst,
38070b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
38080b57cec5SDimitry Andric                 Sched<[sched]>;
38090b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem,
38100b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
38110b57cec5SDimitry Andric                 !if(Is2Addr,
38120b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
38130b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
38140b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38150b57cec5SDimitry Andric                 [(set RC:$dst,
38160b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1),
38170b57cec5SDimitry Andric                                      (ld_frag addr:$src2))))]>,
38180b57cec5SDimitry Andric                 Sched<[sched.Folded, sched.ReadAfterFold]>;
38190b57cec5SDimitry Andric}
38200b57cec5SDimitry Andric
38210b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
38220b57cec5SDimitry Andric  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
38230b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
382406c3fb27SDimitry Andric                             VEX_4V, WIG;
38250b57cec5SDimitry Andric  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
38260b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
382706c3fb27SDimitry Andric                             VEX_4V, WIG;
38280b57cec5SDimitry Andric
38290b57cec5SDimitry Andric  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
38300b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
383106c3fb27SDimitry Andric                             VEX_4V, WIG;
38320b57cec5SDimitry Andric  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
38330b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
383406c3fb27SDimitry Andric                             VEX_4V, WIG;
38350b57cec5SDimitry Andric}
38360b57cec5SDimitry Andric
38370b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
38380b57cec5SDimitry Andric  defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
38390b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
384006c3fb27SDimitry Andric                              VEX_4V, VEX_L, WIG;
38410b57cec5SDimitry Andric  defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
38420b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
384306c3fb27SDimitry Andric                              VEX_4V, VEX_L, WIG;
38440b57cec5SDimitry Andric
38450b57cec5SDimitry Andric  defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
38460b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
384706c3fb27SDimitry Andric                              VEX_4V, VEX_L, WIG;
38480b57cec5SDimitry Andric  defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
38490b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
385006c3fb27SDimitry Andric                              VEX_4V, VEX_L, WIG;
38510b57cec5SDimitry Andric}
38520b57cec5SDimitry Andric
38530b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
38540b57cec5SDimitry Andric  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
38550b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38560b57cec5SDimitry Andric  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
38570b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38580b57cec5SDimitry Andric
38590b57cec5SDimitry Andric  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
38600b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38610b57cec5SDimitry Andric
38620b57cec5SDimitry Andric  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
38630b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38640b57cec5SDimitry Andric}
38650b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
38660b57cec5SDimitry Andric
38670b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
38680b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions
38690b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
38700b57cec5SDimitry Andric
38710b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
38720b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
38730b57cec5SDimitry Andric                       SDNode OpNode, RegisterClass RC, X86MemOperand x86memop,
38740b57cec5SDimitry Andric                       X86FoldableSchedWrite sched, PatFrag ld_frag,
38750b57cec5SDimitry Andric                       bit Is2Addr = 1> {
38760b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
38770b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, RC:$src2),
38780b57cec5SDimitry Andric      !if(Is2Addr,
38790b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
38800b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38810b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
38820b57cec5SDimitry Andric      Sched<[sched]>;
38830b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
38840b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
38850b57cec5SDimitry Andric      !if(Is2Addr,
38860b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
38870b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38880b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
38890b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
38900b57cec5SDimitry Andric}
38910b57cec5SDimitry Andric
38920b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
38930b57cec5SDimitry Andric  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
38940b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
389506c3fb27SDimitry Andric                                 VEX_4V, WIG;
38960b57cec5SDimitry Andric  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
38970b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
389806c3fb27SDimitry Andric                                 VEX_4V, WIG;
38990b57cec5SDimitry Andric  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
39000b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
390106c3fb27SDimitry Andric                                 VEX_4V, WIG;
39020b57cec5SDimitry Andric  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
39030b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
390406c3fb27SDimitry Andric                                 VEX_4V, WIG;
39050b57cec5SDimitry Andric}
39060b57cec5SDimitry Andric
39070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
39080b57cec5SDimitry Andric  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
39090b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
391006c3fb27SDimitry Andric                                 VEX_4V, WIG;
39110b57cec5SDimitry Andric  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
39120b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
391306c3fb27SDimitry Andric                                 VEX_4V, WIG;
39140b57cec5SDimitry Andric  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
39150b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
391606c3fb27SDimitry Andric                                 VEX_4V, WIG;
39170b57cec5SDimitry Andric  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
39180b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
391906c3fb27SDimitry Andric                                 VEX_4V, WIG;
39200b57cec5SDimitry Andric}
39210b57cec5SDimitry Andric
39220b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
39230b57cec5SDimitry Andric  defm VPUNPCKLBWY  : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
39240b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
392506c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39260b57cec5SDimitry Andric  defm VPUNPCKLWDY  : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
39270b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
392806c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39290b57cec5SDimitry Andric  defm VPUNPCKHBWY  : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
39300b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
393106c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39320b57cec5SDimitry Andric  defm VPUNPCKHWDY  : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
39330b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
393406c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39350b57cec5SDimitry Andric}
39360b57cec5SDimitry Andric
39370b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
39380b57cec5SDimitry Andric  defm VPUNPCKLDQY  : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
39390b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
394006c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39410b57cec5SDimitry Andric  defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
39420b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
394306c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39440b57cec5SDimitry Andric  defm VPUNPCKHDQY  : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
39450b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
394606c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39470b57cec5SDimitry Andric  defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
39480b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
394906c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
39500b57cec5SDimitry Andric}
39510b57cec5SDimitry Andric
39520b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
39530b57cec5SDimitry Andric  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
39540b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39550b57cec5SDimitry Andric  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
39560b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39570b57cec5SDimitry Andric  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
39580b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39590b57cec5SDimitry Andric  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
39600b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39610b57cec5SDimitry Andric
39620b57cec5SDimitry Andric  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
39630b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39640b57cec5SDimitry Andric  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
39650b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39660b57cec5SDimitry Andric  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
39670b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39680b57cec5SDimitry Andric  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
39690b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39700b57cec5SDimitry Andric}
39710b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
39720b57cec5SDimitry Andric
39730b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39740b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert
39750b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39760b57cec5SDimitry Andric
39770b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
39780b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> {
39790b57cec5SDimitry Andric  def rr : Ii8<0xC4, MRMSrcReg,
39800b57cec5SDimitry Andric       (outs VR128:$dst), (ins VR128:$src1,
39810b57cec5SDimitry Andric        GR32orGR64:$src2, u8imm:$src3),
39820b57cec5SDimitry Andric       !if(Is2Addr,
39830b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
39840b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
39850b57cec5SDimitry Andric       [(set VR128:$dst,
3986e8d8bef9SDimitry Andric         (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
39870b57cec5SDimitry Andric       Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
39880b57cec5SDimitry Andric  def rm : Ii8<0xC4, MRMSrcMem,
39890b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1,
39900b57cec5SDimitry Andric                       i16mem:$src2, u8imm:$src3),
39910b57cec5SDimitry Andric       !if(Is2Addr,
39920b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
39930b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
39940b57cec5SDimitry Andric       [(set VR128:$dst,
39950b57cec5SDimitry Andric         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
3996e8d8bef9SDimitry Andric                    timm:$src3))]>,
39970b57cec5SDimitry Andric       Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
39980b57cec5SDimitry Andric}
39990b57cec5SDimitry Andric
40000b57cec5SDimitry Andric// Extract
40010b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
40020b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
40030b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
40040b57cec5SDimitry Andric                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
40050b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
4006e8d8bef9SDimitry Andric                                            timm:$src2))]>,
400706c3fb27SDimitry Andric                PD, VEX, WIG, Sched<[WriteVecExtract]>;
40080b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
40090b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
40100b57cec5SDimitry Andric                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
40110b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
4012e8d8bef9SDimitry Andric                                            timm:$src2))]>,
40130b57cec5SDimitry Andric               Sched<[WriteVecExtract]>;
40140b57cec5SDimitry Andric
40150b57cec5SDimitry Andric// Insert
40160b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
401706c3fb27SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, WIG;
40180b57cec5SDimitry Andric
40190b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
40200b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD;
40210b57cec5SDimitry Andric
40220b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
40230b57cec5SDimitry Andric
402481ad6265SDimitry Andric// Always select FP16 instructions if available.
402581ad6265SDimitry Andriclet Predicates = [UseSSE2], AddedComplexity = -10 in {
402681ad6265SDimitry Andric  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (PINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>;
402781ad6265SDimitry Andric  def : Pat<(store f16:$src, addr:$dst), (MOV16mr addr:$dst, (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit))>;
402881ad6265SDimitry Andric  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>;
402981ad6265SDimitry Andric  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (PINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>;
403081ad6265SDimitry Andric}
403181ad6265SDimitry Andric
403281ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in {
403381ad6265SDimitry Andric  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>;
403481ad6265SDimitry Andric  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>;
403581ad6265SDimitry Andric  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>;
403681ad6265SDimitry Andric}
403781ad6265SDimitry Andric
40380b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40390b57cec5SDimitry Andric// SSE2 - Packed Mask Creation
40400b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40410b57cec5SDimitry Andric
40420b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
40430b57cec5SDimitry Andric
40440b57cec5SDimitry Andricdef VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
40450b57cec5SDimitry Andric           (ins VR128:$src),
40460b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
40470b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
404806c3fb27SDimitry Andric           Sched<[WriteVecMOVMSK]>, VEX, WIG;
40490b57cec5SDimitry Andric
40500b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
40510b57cec5SDimitry Andricdef VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
40520b57cec5SDimitry Andric           (ins VR256:$src),
40530b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
40540b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
405506c3fb27SDimitry Andric           Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, WIG;
40560b57cec5SDimitry Andric}
40570b57cec5SDimitry Andric
40580b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
40590b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
40600b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
40610b57cec5SDimitry Andric           Sched<[WriteVecMOVMSK]>;
40620b57cec5SDimitry Andric
40630b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
40640b57cec5SDimitry Andric
40650b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40660b57cec5SDimitry Andric// SSE2 - Conditional Store
40670b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40680b57cec5SDimitry Andric
40690b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
407081ad6265SDimitry Andric// As VEX does not have separate instruction contexts for address size
407181ad6265SDimitry Andric// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict.
407281ad6265SDimitry Andric// Prefer VMASKMODDQU64.
40730b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
40740b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
40750b57cec5SDimitry Andric           (ins VR128:$src, VR128:$mask),
40760b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
40770b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
407806c3fb27SDimitry Andric           VEX, WIG;
407906c3fb27SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in
408006c3fb27SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
408106c3fb27SDimitry Andric           (ins VR128:$src, VR128:$mask),
40820b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
408306c3fb27SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
408406c3fb27SDimitry Andric           VEX, WIG;
408506c3fb27SDimitry Andric
40860b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
40870b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
40880b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
408981ad6265SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
409006c3fb27SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2] in
409106c3fb27SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
409206c3fb27SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
409306c3fb27SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
40940b57cec5SDimitry Andric
40950b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
40960b57cec5SDimitry Andric
40970b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40980b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword
40990b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41000b57cec5SDimitry Andric
41010b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41020b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int
41030b57cec5SDimitry Andric//
41040b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
41050b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
41060b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41070b57cec5SDimitry Andric                        [(set VR128:$dst,
41080b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector GR32:$src)))]>,
41090b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
41100b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
41110b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41120b57cec5SDimitry Andric                        [(set VR128:$dst,
41130b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
41140b57cec5SDimitry Andric                        VEX, Sched<[WriteVecLoad]>;
41150b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
41160b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
41170b57cec5SDimitry Andric                          [(set VR128:$dst,
41180b57cec5SDimitry Andric                            (v2i64 (scalar_to_vector GR64:$src)))]>,
41190b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
41200b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
41210b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41220b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
41230b57cec5SDimitry Andric                          VEX, Sched<[WriteVecLoad]>;
41240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
41250b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
41260b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
41270b57cec5SDimitry Andric                         [(set FR64:$dst, (bitconvert GR64:$src))]>,
41280b57cec5SDimitry Andric                         VEX, Sched<[WriteVecMoveFromGpr]>;
41290b57cec5SDimitry Andric
41300b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
41310b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
41320b57cec5SDimitry Andric                      [(set VR128:$dst,
41330b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector GR32:$src)))]>,
41340b57cec5SDimitry Andric                      Sched<[WriteVecMoveFromGpr]>;
41350b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
41360b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
41370b57cec5SDimitry Andric                      [(set VR128:$dst,
41380b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
41390b57cec5SDimitry Andric                      Sched<[WriteVecLoad]>;
41400b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
41410b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
41420b57cec5SDimitry Andric                        [(set VR128:$dst,
41430b57cec5SDimitry Andric                          (v2i64 (scalar_to_vector GR64:$src)))]>,
41440b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
41450b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
41460b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41470b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
41480b57cec5SDimitry Andric                        Sched<[WriteVecLoad]>;
41490b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
41500b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
41510b57cec5SDimitry Andric                       "movq\t{$src, $dst|$dst, $src}",
41520b57cec5SDimitry Andric                       [(set FR64:$dst, (bitconvert GR64:$src))]>,
41530b57cec5SDimitry Andric                       Sched<[WriteVecMoveFromGpr]>;
41540b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
41550b57cec5SDimitry Andric
41560b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41570b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar
41580b57cec5SDimitry Andric//
41590b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
41600b57cec5SDimitry Andric  def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
41610b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41620b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
41630b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveFromGpr]>;
41640b57cec5SDimitry Andric
41650b57cec5SDimitry Andric  def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
41660b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41670b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
41680b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
41690b57cec5SDimitry Andric
41700b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
41710b57cec5SDimitry Andric
41720b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41730b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int
41740b57cec5SDimitry Andric//
41750b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
41760b57cec5SDimitry Andricdef VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
41770b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
41780b57cec5SDimitry Andric                         [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
41790b57cec5SDimitry Andric                                          (iPTR 0)))]>, VEX,
41800b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
41810b57cec5SDimitry Andricdef VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
41820b57cec5SDimitry Andric                         (ins i32mem:$dst, VR128:$src),
41830b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
41840b57cec5SDimitry Andric                         [(store (i32 (extractelt (v4i32 VR128:$src),
41850b57cec5SDimitry Andric                                       (iPTR 0))), addr:$dst)]>,
41860b57cec5SDimitry Andric                         VEX, Sched<[WriteVecStore]>;
41870b57cec5SDimitry Andricdef MOVPDI2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
41880b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
41890b57cec5SDimitry Andric                       [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
41900b57cec5SDimitry Andric                                        (iPTR 0)))]>,
41910b57cec5SDimitry Andric                   Sched<[WriteVecMoveToGpr]>;
41920b57cec5SDimitry Andricdef MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
41930b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
41940b57cec5SDimitry Andric                       [(store (i32 (extractelt (v4i32 VR128:$src),
41950b57cec5SDimitry Andric                                     (iPTR 0))), addr:$dst)]>,
41960b57cec5SDimitry Andric                       Sched<[WriteVecStore]>;
41970b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
41980b57cec5SDimitry Andric
41990b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42000b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int
42010b57cec5SDimitry Andric//
42020b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
42030b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in {
42040b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
42050b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
42060b57cec5SDimitry Andric                          [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
42070b57cec5SDimitry Andric                                                        (iPTR 0)))]>,
42080b57cec5SDimitry Andric                      VEX;
42090b57cec5SDimitry Andric
42100b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
42110b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
42120b57cec5SDimitry Andric                        [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
42130b57cec5SDimitry Andric                                                         (iPTR 0)))]>;
42140b57cec5SDimitry Andric} //SchedRW
42150b57cec5SDimitry Andric
42160b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
42170b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
42180b57cec5SDimitry Andric                          (ins i64mem:$dst, VR128:$src),
42190b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
42200b57cec5SDimitry Andric                          VEX, Sched<[WriteVecStore]>;
42210b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
42220b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
42230b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
42240b57cec5SDimitry Andric                        Sched<[WriteVecStore]>;
42250b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
42260b57cec5SDimitry Andric
42270b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42280b57cec5SDimitry Andric// Bitcast FR64 <-> GR64
42290b57cec5SDimitry Andric//
42300b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
42310b57cec5SDimitry Andric  def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42320b57cec5SDimitry Andric                           "movq\t{$src, $dst|$dst, $src}",
42330b57cec5SDimitry Andric                           [(set GR64:$dst, (bitconvert FR64:$src))]>,
42340b57cec5SDimitry Andric                           VEX, Sched<[WriteVecMoveToGpr]>;
42350b57cec5SDimitry Andric
42360b57cec5SDimitry Andric  def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42370b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
42380b57cec5SDimitry Andric                         [(set GR64:$dst, (bitconvert FR64:$src))]>,
42390b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
42400b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
42410b57cec5SDimitry Andric
42420b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42430b57cec5SDimitry Andric// Move Scalar Single to Double Int
42440b57cec5SDimitry Andric//
42450b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
42460b57cec5SDimitry Andric  def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
42470b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
42480b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
42490b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveToGpr]>;
42500b57cec5SDimitry Andric  def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
42510b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
42520b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
42530b57cec5SDimitry Andric                        Sched<[WriteVecMoveToGpr]>;
42540b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
42550b57cec5SDimitry Andric
42560b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
4257bdd1243dSDimitry Andric  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4258bdd1243dSDimitry Andric            (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4259bdd1243dSDimitry Andric                                              GR8:$src, sub_8bit)))>;
42600b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
42610b57cec5SDimitry Andric            (VMOVDI2PDIrr GR32:$src)>;
42620b57cec5SDimitry Andric
42630b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
42640b57cec5SDimitry Andric            (VMOV64toPQIrr GR64:$src)>;
42650b57cec5SDimitry Andric
42660b57cec5SDimitry Andric  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
42670b57cec5SDimitry Andric  // These instructions also write zeros in the high part of a 256-bit register.
42680b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
42690b57cec5SDimitry Andric            (VMOVDI2PDIrm addr:$src)>;
42700b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzload32 addr:$src)),
42710b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
42720b57cec5SDimitry Andric}
42730b57cec5SDimitry Andric
42740b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
42750b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
42760b57cec5SDimitry Andric            (MOVDI2PDIrr GR32:$src)>;
42770b57cec5SDimitry Andric
42780b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
42790b57cec5SDimitry Andric            (MOV64toPQIrr GR64:$src)>;
42800b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
42810b57cec5SDimitry Andric            (MOVDI2PDIrm addr:$src)>;
42820b57cec5SDimitry Andric}
42830b57cec5SDimitry Andric
42840b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
42850b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add
42860b57cec5SDimitry Andric// these aliases.
42870b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
42880b57cec5SDimitry Andric                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
42890b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
42900b57cec5SDimitry Andric                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
42910b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
42920b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
42930b57cec5SDimitry Andric                (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
42940b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
42950b57cec5SDimitry Andric                (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
42960b57cec5SDimitry Andric
42970b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42980b57cec5SDimitry Andric// SSE2 - Move Quadword
42990b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43000b57cec5SDimitry Andric
43010b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43020b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int
43030b57cec5SDimitry Andric//
43040b57cec5SDimitry Andric
43050b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in {
43060b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
43070b57cec5SDimitry Andric                    "vmovq\t{$src, $dst|$dst, $src}",
43080b57cec5SDimitry Andric                    [(set VR128:$dst,
43090b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
431006c3fb27SDimitry Andric                    VEX, Requires<[UseAVX]>, WIG;
43110b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
43120b57cec5SDimitry Andric                    "movq\t{$src, $dst|$dst, $src}",
43130b57cec5SDimitry Andric                    [(set VR128:$dst,
43140b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
43150b57cec5SDimitry Andric                    XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
43160b57cec5SDimitry Andric} // ExeDomain, SchedRW
43170b57cec5SDimitry Andric
43180b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43190b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int
43200b57cec5SDimitry Andric//
43210b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
43220b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
43230b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
43240b57cec5SDimitry Andric                        [(store (i64 (extractelt (v2i64 VR128:$src),
43250b57cec5SDimitry Andric                                      (iPTR 0))), addr:$dst)]>,
432606c3fb27SDimitry Andric                        VEX, WIG;
43270b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
43280b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}",
43290b57cec5SDimitry Andric                      [(store (i64 (extractelt (v2i64 VR128:$src),
43300b57cec5SDimitry Andric                                    (iPTR 0))), addr:$dst)]>;
43310b57cec5SDimitry Andric} // ExeDomain, SchedRW
43320b57cec5SDimitry Andric
43330b57cec5SDimitry Andric// For disassembler only
43340b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
43350b57cec5SDimitry Andric    SchedRW = [SchedWriteVecLogic.XMM] in {
43360b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
433706c3fb27SDimitry Andric                     "movq\t{$src, $dst|$dst, $src}", []>, VEX, WIG;
43380b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
43390b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}", []>;
43400b57cec5SDimitry Andric}
43410b57cec5SDimitry Andric
43420b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
43430b57cec5SDimitry Andric                (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
43440b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
43450b57cec5SDimitry Andric                (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
43460b57cec5SDimitry Andric
43470b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
43480b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)),
43490b57cec5SDimitry Andric            (VMOVQI2PQIrm addr:$src)>;
43500b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzload64 addr:$src)),
43510b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
43520b57cec5SDimitry Andric
43530b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
43540b57cec5SDimitry Andric            (VMOVPQI2QImr addr:$dst, VR128:$src)>;
43550b57cec5SDimitry Andric}
43560b57cec5SDimitry Andric
43570b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
43580b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
43590b57cec5SDimitry Andric
43600b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
43610b57cec5SDimitry Andric            (MOVPQI2QImr addr:$dst, VR128:$src)>;
43620b57cec5SDimitry Andric}
43630b57cec5SDimitry Andric
43640b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43650b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
43660b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits.
43670b57cec5SDimitry Andric//
43680b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
43690b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
43700b57cec5SDimitry Andric                        "vmovq\t{$src, $dst|$dst, $src}",
43710b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
437206c3fb27SDimitry Andric                         XS, VEX, Requires<[UseAVX]>, WIG;
43730b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
43740b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
43750b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
43760b57cec5SDimitry Andric                        XS, Requires<[UseSSE2]>;
43770b57cec5SDimitry Andric} // ExeDomain, SchedRW
43780b57cec5SDimitry Andric
43790b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
43800b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
43810b57cec5SDimitry Andric            (VMOVZPQILo2PQIrr VR128:$src)>;
43820b57cec5SDimitry Andric}
43830b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
43840b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
43850b57cec5SDimitry Andric            (MOVZPQILo2PQIrr VR128:$src)>;
43860b57cec5SDimitry Andric}
43870b57cec5SDimitry Andric
43880b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
43890b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
43900b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
43910b57cec5SDimitry Andric             (v2f64 (VMOVZPQILo2PQIrr
43920b57cec5SDimitry Andric                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
43930b57cec5SDimitry Andric             sub_xmm)>;
43940b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
43950b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
43960b57cec5SDimitry Andric             (v2i64 (VMOVZPQILo2PQIrr
43970b57cec5SDimitry Andric                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
43980b57cec5SDimitry Andric             sub_xmm)>;
43990b57cec5SDimitry Andric}
44000b57cec5SDimitry Andric
44010b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44020b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
44030b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44040b57cec5SDimitry Andric
44050b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
44060b57cec5SDimitry Andric                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
44070b57cec5SDimitry Andric                              X86MemOperand x86memop, X86FoldableSchedWrite sched> {
44080b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
44090b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44100b57cec5SDimitry Andric                      [(set RC:$dst, (vt (OpNode RC:$src)))]>,
44110b57cec5SDimitry Andric                      Sched<[sched]>;
44120b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
44130b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44140b57cec5SDimitry Andric                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>,
44150b57cec5SDimitry Andric                      Sched<[sched.Folded]>;
44160b57cec5SDimitry Andric}
44170b57cec5SDimitry Andric
44180b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
44190b57cec5SDimitry Andric  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
44200b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
442106c3fb27SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, WIG;
44220b57cec5SDimitry Andric  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
44230b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
442406c3fb27SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, WIG;
44250b57cec5SDimitry Andric  defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
44260b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
442706c3fb27SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG;
44280b57cec5SDimitry Andric  defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
44290b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
443006c3fb27SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG;
44310b57cec5SDimitry Andric}
44320b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
44330b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
44340b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
44350b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
44360b57cec5SDimitry Andric
44370b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
44380b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
44390b57cec5SDimitry Andric            (VMOVSHDUPrr VR128:$src)>;
44400b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
44410b57cec5SDimitry Andric            (VMOVSHDUPrm addr:$src)>;
44420b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
44430b57cec5SDimitry Andric            (VMOVSLDUPrr VR128:$src)>;
44440b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
44450b57cec5SDimitry Andric            (VMOVSLDUPrm addr:$src)>;
44460b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup VR256:$src)),
44470b57cec5SDimitry Andric            (VMOVSHDUPYrr VR256:$src)>;
44480b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
44490b57cec5SDimitry Andric            (VMOVSHDUPYrm addr:$src)>;
44500b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup VR256:$src)),
44510b57cec5SDimitry Andric            (VMOVSLDUPYrr VR256:$src)>;
44520b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
44530b57cec5SDimitry Andric            (VMOVSLDUPYrm addr:$src)>;
44540b57cec5SDimitry Andric}
44550b57cec5SDimitry Andric
44560b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
44570b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
44580b57cec5SDimitry Andric            (MOVSHDUPrr VR128:$src)>;
44590b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
44600b57cec5SDimitry Andric            (MOVSHDUPrm addr:$src)>;
44610b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
44620b57cec5SDimitry Andric            (MOVSLDUPrr VR128:$src)>;
44630b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
44640b57cec5SDimitry Andric            (MOVSLDUPrm addr:$src)>;
44650b57cec5SDimitry Andric}
44660b57cec5SDimitry Andric
44670b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44680b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP
44690b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44700b57cec5SDimitry Andric
44710b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> {
44720b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
44730b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44740b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>,
44750b57cec5SDimitry Andric                    Sched<[sched.XMM]>;
44760b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
44770b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44780b57cec5SDimitry Andric                    [(set VR128:$dst,
44790b57cec5SDimitry Andric                      (v2f64 (X86Movddup
44800b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src)))))]>,
44810b57cec5SDimitry Andric                    Sched<[sched.XMM.Folded]>;
44820b57cec5SDimitry Andric}
44830b57cec5SDimitry Andric
44840b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version
44850b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> {
44860b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
44870b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44880b57cec5SDimitry Andric                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
44890b57cec5SDimitry Andric                    Sched<[sched.YMM]>;
44900b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
44910b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44920b57cec5SDimitry Andric                    [(set VR256:$dst,
44930b57cec5SDimitry Andric                      (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
44940b57cec5SDimitry Andric                    Sched<[sched.YMM.Folded]>;
44950b57cec5SDimitry Andric}
44960b57cec5SDimitry Andric
44970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
44980b57cec5SDimitry Andric  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>,
449906c3fb27SDimitry Andric                                      VEX, WIG;
45000b57cec5SDimitry Andric  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>,
450106c3fb27SDimitry Andric                                        VEX, VEX_L, WIG;
45020b57cec5SDimitry Andric}
45030b57cec5SDimitry Andric
45040b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
45050b57cec5SDimitry Andric
45060b57cec5SDimitry Andric
45070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
45080b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
45090b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
45100b57cec5SDimitry Andric}
45110b57cec5SDimitry Andric
45120b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
45130b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
45140b57cec5SDimitry Andric            (MOVDDUPrm addr:$src)>;
45150b57cec5SDimitry Andric}
45160b57cec5SDimitry Andric
45170b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45180b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer
45190b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45200b57cec5SDimitry Andric
45210b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
45220b57cec5SDimitry Andric  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
45230b57cec5SDimitry Andric                      "vlddqu\t{$src, $dst|$dst, $src}",
45240b57cec5SDimitry Andric                      [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
452506c3fb27SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG;
45260b57cec5SDimitry Andric  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
45270b57cec5SDimitry Andric                       "vlddqu\t{$src, $dst|$dst, $src}",
45280b57cec5SDimitry Andric                       [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
452906c3fb27SDimitry Andric                       Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, WIG;
45300b57cec5SDimitry Andric} // Predicates
45310b57cec5SDimitry Andric
45320b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
45330b57cec5SDimitry Andric                   "lddqu\t{$src, $dst|$dst, $src}",
45340b57cec5SDimitry Andric                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
45350b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>;
45360b57cec5SDimitry Andric
45370b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45380b57cec5SDimitry Andric// SSE3 - Arithmetic
45390b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45400b57cec5SDimitry Andric
45410b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
45420b57cec5SDimitry Andric                       X86MemOperand x86memop, X86FoldableSchedWrite sched,
45430b57cec5SDimitry Andric                       PatFrag ld_frag, bit Is2Addr = 1> {
4544480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
45450b57cec5SDimitry Andric  def rr : I<0xD0, MRMSrcReg,
45460b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, RC:$src2),
45470b57cec5SDimitry Andric       !if(Is2Addr,
45480b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45490b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45500b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>,
45510b57cec5SDimitry Andric       Sched<[sched]>;
45520b57cec5SDimitry Andric  def rm : I<0xD0, MRMSrcMem,
45530b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
45540b57cec5SDimitry Andric       !if(Is2Addr,
45550b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45560b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45570b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
45580b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
45590b57cec5SDimitry Andric}
4560480093f4SDimitry Andric}
45610b57cec5SDimitry Andric
45620b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
45630b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
45640b57cec5SDimitry Andric    defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
45650b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
456606c3fb27SDimitry Andric                                 XD, VEX_4V, WIG;
45670b57cec5SDimitry Andric    defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
45680b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
456906c3fb27SDimitry Andric                                  XD, VEX_4V, VEX_L, WIG;
45700b57cec5SDimitry Andric  }
45710b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
45720b57cec5SDimitry Andric    defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
45730b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
457406c3fb27SDimitry Andric                                 PD, VEX_4V, WIG;
45750b57cec5SDimitry Andric    defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
45760b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
457706c3fb27SDimitry Andric                                  PD, VEX_4V, VEX_L, WIG;
45780b57cec5SDimitry Andric  }
45790b57cec5SDimitry Andric}
45800b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
45810b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
45820b57cec5SDimitry Andric  defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
45830b57cec5SDimitry Andric                              SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
45840b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
45850b57cec5SDimitry Andric  defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
45860b57cec5SDimitry Andric                              SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
45870b57cec5SDimitry Andric}
45880b57cec5SDimitry Andric
45890b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45900b57cec5SDimitry Andric// SSE3 Instructions
45910b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45920b57cec5SDimitry Andric
45930b57cec5SDimitry Andric// Horizontal ops
45940b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
45950b57cec5SDimitry Andric                   X86MemOperand x86memop, SDNode OpNode,
45960b57cec5SDimitry Andric                   X86FoldableSchedWrite sched, PatFrag ld_frag,
45970b57cec5SDimitry Andric                   bit Is2Addr = 1> {
4598480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
45990b57cec5SDimitry Andric  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
46000b57cec5SDimitry Andric       !if(Is2Addr,
46010b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
46020b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
46030b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
46040b57cec5SDimitry Andric      Sched<[sched]>;
46050b57cec5SDimitry Andric
46060b57cec5SDimitry Andric  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
46070b57cec5SDimitry Andric       !if(Is2Addr,
46080b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
46090b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
46100b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
46110b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
46120b57cec5SDimitry Andric}
4613480093f4SDimitry Andric}
46140b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
46150b57cec5SDimitry Andric                  X86MemOperand x86memop, SDNode OpNode,
46160b57cec5SDimitry Andric                  X86FoldableSchedWrite sched, PatFrag ld_frag,
46170b57cec5SDimitry Andric                  bit Is2Addr = 1> {
4618480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
46190b57cec5SDimitry Andric  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
46200b57cec5SDimitry Andric       !if(Is2Addr,
46210b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
46220b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
46230b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
46240b57cec5SDimitry Andric        Sched<[sched]>;
46250b57cec5SDimitry Andric
46260b57cec5SDimitry Andric  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
46270b57cec5SDimitry Andric       !if(Is2Addr,
46280b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
46290b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
46300b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
46310b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
46320b57cec5SDimitry Andric}
4633480093f4SDimitry Andric}
46340b57cec5SDimitry Andric
46350b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
46360b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
46370b57cec5SDimitry Andric    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
463806c3fb27SDimitry Andric                            X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
46390b57cec5SDimitry Andric    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
464006c3fb27SDimitry Andric                            X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
46410b57cec5SDimitry Andric    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
464206c3fb27SDimitry Andric                            X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
46430b57cec5SDimitry Andric    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
464406c3fb27SDimitry Andric                            X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
46450b57cec5SDimitry Andric  }
46460b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
46470b57cec5SDimitry Andric    defm VHADDPD  : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
464806c3fb27SDimitry Andric                           X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
46490b57cec5SDimitry Andric    defm VHSUBPD  : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
465006c3fb27SDimitry Andric                           X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
46510b57cec5SDimitry Andric    defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
465206c3fb27SDimitry Andric                           X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
46530b57cec5SDimitry Andric    defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
465406c3fb27SDimitry Andric                           X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
46550b57cec5SDimitry Andric  }
46560b57cec5SDimitry Andric}
46570b57cec5SDimitry Andric
46580b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
46590b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
46600b57cec5SDimitry Andric    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
46610b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
46620b57cec5SDimitry Andric    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
46630b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
46640b57cec5SDimitry Andric  }
46650b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
46660b57cec5SDimitry Andric    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
46670b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
46680b57cec5SDimitry Andric    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
46690b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
46700b57cec5SDimitry Andric  }
46710b57cec5SDimitry Andric}
46720b57cec5SDimitry Andric
46730b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46740b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions
46750b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46760b57cec5SDimitry Andric
46770b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
46780b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
46790b57cec5SDimitry Andric                        SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> {
46800b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
46810b57cec5SDimitry Andric                 (ins VR128:$src),
46820b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46830b57cec5SDimitry Andric                 [(set VR128:$dst, (vt (OpNode VR128:$src)))]>,
46840b57cec5SDimitry Andric                 Sched<[sched.XMM]>;
46850b57cec5SDimitry Andric
46860b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
46870b57cec5SDimitry Andric                 (ins i128mem:$src),
46880b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46890b57cec5SDimitry Andric                 [(set VR128:$dst,
46900b57cec5SDimitry Andric                   (vt (OpNode (ld_frag addr:$src))))]>,
46910b57cec5SDimitry Andric                 Sched<[sched.XMM.Folded]>;
46920b57cec5SDimitry Andric}
46930b57cec5SDimitry Andric
46940b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
46950b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
46960b57cec5SDimitry Andric                          SDNode OpNode, X86SchedWriteWidths sched> {
46970b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
46980b57cec5SDimitry Andric                  (ins VR256:$src),
46990b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
47000b57cec5SDimitry Andric                  [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
47010b57cec5SDimitry Andric                  Sched<[sched.YMM]>;
47020b57cec5SDimitry Andric
47030b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
47040b57cec5SDimitry Andric                  (ins i256mem:$src),
47050b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
47060b57cec5SDimitry Andric                  [(set VR256:$dst,
47070b57cec5SDimitry Andric                    (vt (OpNode (load addr:$src))))]>,
47080b57cec5SDimitry Andric                  Sched<[sched.YMM.Folded]>;
47090b57cec5SDimitry Andric}
47100b57cec5SDimitry Andric
47110b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
47120b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
471306c3fb27SDimitry Andric                              load>, VEX, WIG;
47140b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
471506c3fb27SDimitry Andric                              load>, VEX, WIG;
47160b57cec5SDimitry Andric}
47170b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
47180b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
471906c3fb27SDimitry Andric                              load>, VEX, WIG;
47200b57cec5SDimitry Andric}
47210b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
47220b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
472306c3fb27SDimitry Andric                                VEX, VEX_L, WIG;
47240b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>,
472506c3fb27SDimitry Andric                                VEX, VEX_L, WIG;
47260b57cec5SDimitry Andric}
47270b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
47280b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>,
472906c3fb27SDimitry Andric                                VEX, VEX_L, WIG;
47300b57cec5SDimitry Andric}
47310b57cec5SDimitry Andric
47320b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
47330b57cec5SDimitry Andric                          memop>;
47340b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
47350b57cec5SDimitry Andric                          memop>;
47360b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
47370b57cec5SDimitry Andric                          memop>;
47380b57cec5SDimitry Andric
47390b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
47400b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions
47410b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
47420b57cec5SDimitry Andric
47430b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op
47440b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
47450b57cec5SDimitry Andric                         ValueType DstVT, ValueType OpVT, RegisterClass RC,
47460b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
47470b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
47480b57cec5SDimitry Andric  let isCommutable = 1 in
47490b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
47500b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
47510b57cec5SDimitry Andric       !if(Is2Addr,
47520b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47530b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47540b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>,
47550b57cec5SDimitry Andric       Sched<[sched]>;
47560b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
47570b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
47580b57cec5SDimitry Andric       !if(Is2Addr,
47590b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47600b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47610b57cec5SDimitry Andric       [(set RC:$dst,
47620b57cec5SDimitry Andric         (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
47630b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
47640b57cec5SDimitry Andric}
47650b57cec5SDimitry Andric
47660b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
47670b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
47680b57cec5SDimitry Andric                             Intrinsic IntId128, X86FoldableSchedWrite sched,
47690b57cec5SDimitry Andric                             PatFrag ld_frag, bit Is2Addr = 1> {
47700b57cec5SDimitry Andric  let isCommutable = 1 in
47710b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
47720b57cec5SDimitry Andric       (ins VR128:$src1, VR128:$src2),
47730b57cec5SDimitry Andric       !if(Is2Addr,
47740b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47750b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47760b57cec5SDimitry Andric       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
47770b57cec5SDimitry Andric       Sched<[sched]>;
47780b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
47790b57cec5SDimitry Andric       (ins VR128:$src1, i128mem:$src2),
47800b57cec5SDimitry Andric       !if(Is2Addr,
47810b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47820b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47830b57cec5SDimitry Andric       [(set VR128:$dst,
47840b57cec5SDimitry Andric         (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
47850b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
47860b57cec5SDimitry Andric}
47870b57cec5SDimitry Andric
47880b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
47890b57cec5SDimitry Andric                               Intrinsic IntId256,
47900b57cec5SDimitry Andric                               X86FoldableSchedWrite sched> {
47910b57cec5SDimitry Andric  let isCommutable = 1 in
47920b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
47930b57cec5SDimitry Andric       (ins VR256:$src1, VR256:$src2),
47940b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
47950b57cec5SDimitry Andric       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
47960b57cec5SDimitry Andric       Sched<[sched]>;
47970b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
47980b57cec5SDimitry Andric       (ins VR256:$src1, i256mem:$src2),
47990b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
48000b57cec5SDimitry Andric       [(set VR256:$dst,
48010b57cec5SDimitry Andric         (IntId256 VR256:$src1, (load addr:$src2)))]>,
48020b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
48030b57cec5SDimitry Andric}
48040b57cec5SDimitry Andric
48050b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
48060b57cec5SDimitry Andriclet isCommutable = 0 in {
48070b57cec5SDimitry Andric  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
48080b57cec5SDimitry Andric                                  VR128, load, i128mem,
480906c3fb27SDimitry Andric                                  SchedWriteVarShuffle.XMM, 0>, VEX_4V, WIG;
48100b57cec5SDimitry Andric  defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
48110b57cec5SDimitry Andric                                  v16i8, VR128, load, i128mem,
481206c3fb27SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
48130b57cec5SDimitry Andric}
48140b57cec5SDimitry Andricdefm VPMULHRSW    : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
48150b57cec5SDimitry Andric                                  VR128, load, i128mem,
481606c3fb27SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
48170b57cec5SDimitry Andric}
48180b57cec5SDimitry Andric
48190b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in {
48200b57cec5SDimitry Andriclet isCommutable = 0 in {
48210b57cec5SDimitry Andric  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
48220b57cec5SDimitry Andric                                  load, i128mem,
482306c3fb27SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
48240b57cec5SDimitry Andric  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
48250b57cec5SDimitry Andric                                  load, i128mem,
482606c3fb27SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
48270b57cec5SDimitry Andric  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
48280b57cec5SDimitry Andric                                  load, i128mem,
482906c3fb27SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
48300b57cec5SDimitry Andric  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
48310b57cec5SDimitry Andric                                  load, i128mem,
483206c3fb27SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
48330b57cec5SDimitry Andric  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
48340b57cec5SDimitry Andric                                      int_x86_ssse3_psign_b_128,
483506c3fb27SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
48360b57cec5SDimitry Andric  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw",
48370b57cec5SDimitry Andric                                      int_x86_ssse3_psign_w_128,
483806c3fb27SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
48390b57cec5SDimitry Andric  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
48400b57cec5SDimitry Andric                                      int_x86_ssse3_psign_d_128,
484106c3fb27SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
48420b57cec5SDimitry Andric  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
48430b57cec5SDimitry Andric                                      int_x86_ssse3_phadd_sw_128,
484406c3fb27SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
48450b57cec5SDimitry Andric  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
48460b57cec5SDimitry Andric                                      int_x86_ssse3_phsub_sw_128,
484706c3fb27SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
48480b57cec5SDimitry Andric}
48490b57cec5SDimitry Andric}
48500b57cec5SDimitry Andric
48510b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
48520b57cec5SDimitry Andriclet isCommutable = 0 in {
48530b57cec5SDimitry Andric  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
48540b57cec5SDimitry Andric                                  VR256, load, i256mem,
485506c3fb27SDimitry Andric                                  SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
48560b57cec5SDimitry Andric  defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
48570b57cec5SDimitry Andric                                   v32i8, VR256, load, i256mem,
485806c3fb27SDimitry Andric                                   SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
48590b57cec5SDimitry Andric}
48600b57cec5SDimitry Andricdefm VPMULHRSWY   : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
48610b57cec5SDimitry Andric                                  VR256, load, i256mem,
486206c3fb27SDimitry Andric                                  SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
48630b57cec5SDimitry Andric}
48640b57cec5SDimitry Andric
48650b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in {
48660b57cec5SDimitry Andriclet isCommutable = 0 in {
48670b57cec5SDimitry Andric  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
48680b57cec5SDimitry Andric                                  VR256, load, i256mem,
486906c3fb27SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
48700b57cec5SDimitry Andric  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
48710b57cec5SDimitry Andric                                  load, i256mem,
487206c3fb27SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
48730b57cec5SDimitry Andric  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
48740b57cec5SDimitry Andric                                  VR256, load, i256mem,
487506c3fb27SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
48760b57cec5SDimitry Andric  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
48770b57cec5SDimitry Andric                                  load, i256mem,
487806c3fb27SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
48790b57cec5SDimitry Andric  defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
488006c3fb27SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
48810b57cec5SDimitry Andric  defm VPSIGNW   : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
488206c3fb27SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
48830b57cec5SDimitry Andric  defm VPSIGND   : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
488406c3fb27SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
48850b57cec5SDimitry Andric  defm VPHADDSW  : SS3I_binop_rm_int_y<0x03, "vphaddsw",
48860b57cec5SDimitry Andric                                       int_x86_avx2_phadd_sw,
488706c3fb27SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
48880b57cec5SDimitry Andric  defm VPHSUBSW  : SS3I_binop_rm_int_y<0x07, "vphsubsw",
48890b57cec5SDimitry Andric                                       int_x86_avx2_phsub_sw,
489006c3fb27SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
48910b57cec5SDimitry Andric}
48920b57cec5SDimitry Andric}
48930b57cec5SDimitry Andric
48940b57cec5SDimitry Andric// None of these have i8 immediate fields.
48950b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in {
48960b57cec5SDimitry Andriclet isCommutable = 0 in {
48970b57cec5SDimitry Andric  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
48980b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
48990b57cec5SDimitry Andric  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
49000b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
49010b57cec5SDimitry Andric  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
49020b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
49030b57cec5SDimitry Andric  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
49040b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
49050b57cec5SDimitry Andric  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
49060b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
49070b57cec5SDimitry Andric  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
49080b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
49090b57cec5SDimitry Andric  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
49100b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
49110b57cec5SDimitry Andric  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
49120b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVarShuffle.XMM>;
49130b57cec5SDimitry Andric  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
49140b57cec5SDimitry Andric                                     int_x86_ssse3_phadd_sw_128,
49150b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
49160b57cec5SDimitry Andric  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
49170b57cec5SDimitry Andric                                     int_x86_ssse3_phsub_sw_128,
49180b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
49190b57cec5SDimitry Andric  defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
49200b57cec5SDimitry Andric                                 v16i8, VR128, memop, i128mem,
49210b57cec5SDimitry Andric                                 SchedWriteVecIMul.XMM>;
49220b57cec5SDimitry Andric}
49230b57cec5SDimitry Andricdefm PMULHRSW    : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
49240b57cec5SDimitry Andric                                 VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
49250b57cec5SDimitry Andric}
49260b57cec5SDimitry Andric
49270b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49280b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns
49290b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49300b57cec5SDimitry Andric
49310b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
49320b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
49330b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
49340b57cec5SDimitry Andric  let hasSideEffects = 0 in {
49350b57cec5SDimitry Andric  def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst),
49360b57cec5SDimitry Andric      (ins RC:$src1, RC:$src2, u8imm:$src3),
49370b57cec5SDimitry Andric      !if(Is2Addr,
49380b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
49390b57cec5SDimitry Andric        !strconcat(asm,
49400b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
49418bcb0991SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
49420b57cec5SDimitry Andric      Sched<[sched]>;
49430b57cec5SDimitry Andric  let mayLoad = 1 in
49440b57cec5SDimitry Andric  def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
49450b57cec5SDimitry Andric      (ins RC:$src1, x86memop:$src2, u8imm:$src3),
49460b57cec5SDimitry Andric      !if(Is2Addr,
49470b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
49480b57cec5SDimitry Andric        !strconcat(asm,
49490b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
49500b57cec5SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1,
49510b57cec5SDimitry Andric                                     (memop_frag addr:$src2),
49528bcb0991SDimitry Andric                                     (i8 timm:$src3))))]>,
49530b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
49540b57cec5SDimitry Andric  }
49550b57cec5SDimitry Andric}
49560b57cec5SDimitry Andric
49570b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
49580b57cec5SDimitry Andric  defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
495906c3fb27SDimitry Andric                                SchedWriteShuffle.XMM, 0>, VEX_4V, WIG;
49600b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
49610b57cec5SDimitry Andric  defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
496206c3fb27SDimitry Andric                                 SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
49630b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
49640b57cec5SDimitry Andric  defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
49650b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
49660b57cec5SDimitry Andric
49670b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49680b57cec5SDimitry Andric// SSSE3 - Thread synchronization
49690b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49700b57cec5SDimitry Andric
49710b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
49720b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in
49730b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
49740b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, Not64BitMode]>;
49750b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in
49760b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
49770b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, In64BitMode]>;
49780b57cec5SDimitry Andric
49790b57cec5SDimitry Andriclet Uses = [ECX, EAX] in
49800b57cec5SDimitry Andricdef MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
49810b57cec5SDimitry Andric                  [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
49820b57cec5SDimitry Andric} // SchedRW
49830b57cec5SDimitry Andric
49840b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
49850b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
49860b57cec5SDimitry Andric
49870b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
49880b57cec5SDimitry Andric      Requires<[Not64BitMode]>;
49890b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
49900b57cec5SDimitry Andric      Requires<[In64BitMode]>;
49910b57cec5SDimitry Andric
49920b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
49930b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend
49940b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
49950b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
49960b57cec5SDimitry Andric
49970b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
49980b57cec5SDimitry Andric                            RegisterClass OutRC, RegisterClass InRC,
49990b57cec5SDimitry Andric                            X86FoldableSchedWrite sched> {
50000b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
50010b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
50020b57cec5SDimitry Andric                 Sched<[sched]>;
50030b57cec5SDimitry Andric
50040b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
50050b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
50060b57cec5SDimitry Andric                 Sched<[sched.Folded]>;
50070b57cec5SDimitry Andric}
50080b57cec5SDimitry Andric
50090b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
50100b57cec5SDimitry Andric                              X86MemOperand MemOp, X86MemOperand MemYOp,
50110b57cec5SDimitry Andric                              Predicate prd> {
50120b57cec5SDimitry Andric  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
50130b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
50140b57cec5SDimitry Andric  let Predicates = [HasAVX, prd] in
50150b57cec5SDimitry Andric    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
5016bdd1243dSDimitry Andric                                     VR128, VR128, SchedWriteVecExtend.XMM>,
501706c3fb27SDimitry Andric                                     VEX, WIG;
50180b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in
50190b57cec5SDimitry Andric    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
5020bdd1243dSDimitry Andric                                     VR256, VR128, SchedWriteVecExtend.YMM>,
502106c3fb27SDimitry Andric                                     VEX, VEX_L, WIG;
50220b57cec5SDimitry Andric}
50230b57cec5SDimitry Andric
50240b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
50250b57cec5SDimitry Andric                          X86MemOperand MemYOp, Predicate prd> {
50260b57cec5SDimitry Andric  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
50270b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
50280b57cec5SDimitry Andric  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
50290b57cec5SDimitry Andric                                        !strconcat("pmovzx", OpcodeStr),
50300b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
50310b57cec5SDimitry Andric}
50320b57cec5SDimitry Andric
50330b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
50340b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>;
50350b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>;
50360b57cec5SDimitry Andric
50370b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>;
50380b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
50390b57cec5SDimitry Andric
50400b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
50410b57cec5SDimitry Andric
50420b57cec5SDimitry Andric// AVX2 Patterns
50430b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
50440b57cec5SDimitry Andric                                     SDNode ExtOp, SDNode InVecOp> {
50450b57cec5SDimitry Andric  // Register-Register patterns
50460b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
50470b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
50480b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
50490b57cec5SDimitry Andric  }
50500b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
50510b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
50520b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
50530b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
50540b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
50550b57cec5SDimitry Andric
50560b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
50570b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
50580b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
50590b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
50600b57cec5SDimitry Andric
50610b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
50620b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
50630b57cec5SDimitry Andric  }
50640b57cec5SDimitry Andric
50650b57cec5SDimitry Andric  // Simple Register-Memory patterns
50660b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
50670b57cec5SDimitry Andric  def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50680b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
50690b57cec5SDimitry Andric
50700b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
50710b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
50720b57cec5SDimitry Andric  }
50730b57cec5SDimitry Andric
50740b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
50750b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50760b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50770b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50780b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
50790b57cec5SDimitry Andric
50800b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50810b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
50820b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50830b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
50840b57cec5SDimitry Andric
50850b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
50860b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
50870b57cec5SDimitry Andric  }
50880b57cec5SDimitry Andric
50890b57cec5SDimitry Andric  // AVX2 Register-Memory patterns
50900b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
50910b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
50920b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
50930b57cec5SDimitry Andric
50940b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
50950b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50965ffd83dbSDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
50975ffd83dbSDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50985ffd83dbSDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
50990b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
51000b57cec5SDimitry Andric
51010b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
51020b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
51030b57cec5SDimitry Andric
51040b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
51050b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
51065ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))),
51070b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
51080b57cec5SDimitry Andric
51090b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51100b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
51115ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51125ffd83dbSDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
51135ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
51140b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
51150b57cec5SDimitry Andric  }
51160b57cec5SDimitry Andric}
51170b57cec5SDimitry Andric
51180b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
51190b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
51200b57cec5SDimitry Andric
51210b57cec5SDimitry Andric// SSE4.1/AVX patterns.
51220b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
51230b57cec5SDimitry Andric                                SDNode ExtOp> {
51240b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
51250b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
51260b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
51270b57cec5SDimitry Andric  }
51280b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
51290b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
51300b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
51310b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
51320b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
51330b57cec5SDimitry Andric
51340b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
51350b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
51360b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
51370b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
51380b57cec5SDimitry Andric
51390b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
51400b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
51410b57cec5SDimitry Andric  }
51420b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
51430b57cec5SDimitry Andric  def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
51440b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51450b57cec5SDimitry Andric  }
51460b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
51470b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
51480b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51490b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
51500b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
51510b57cec5SDimitry Andric
51520b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
51530b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51540b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
51550b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51560b57cec5SDimitry Andric
51570b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
51580b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51590b57cec5SDimitry Andric  }
51600b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
51610b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51620b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51630b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51640b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51650b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
51660b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51670b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
51680b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51690b57cec5SDimitry Andric  }
51700b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
51710b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
51720b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51730b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
51740b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51750b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
51760b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51770b57cec5SDimitry Andric
51780b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
51790b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
51800b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
51810b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
51820b57cec5SDimitry Andric
51830b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51840b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51850b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51860b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51870b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
51880b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51890b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
51900b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51910b57cec5SDimitry Andric
51920b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
51930b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51940b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
51950b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51960b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
51970b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51980b57cec5SDimitry Andric
51990b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
52000b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
52010b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
52020b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
52030b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
52040b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
52050b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
52060b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
52070b57cec5SDimitry Andric  }
52080b57cec5SDimitry Andric}
52090b57cec5SDimitry Andric
52100b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>;
52110b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>;
52120b57cec5SDimitry Andric
52130b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
52140b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>;
52150b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>;
52160b57cec5SDimitry Andric}
52170b57cec5SDimitry Andric
52180b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
52190b57cec5SDimitry Andric// SSE4.1 - Extract Instructions
52200b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
52210b57cec5SDimitry Andric
52220b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
52230b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> {
52240b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
52250b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
52260b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52270b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52280b57cec5SDimitry Andric                 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
5229e8d8bef9SDimitry Andric                                         timm:$src2))]>,
52300b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
52310b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
52320b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52330b57cec5SDimitry Andric                 (ins i8mem:$dst, VR128:$src1, u8imm:$src2),
52340b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52350b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5236e8d8bef9SDimitry Andric                 [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
52370b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52380b57cec5SDimitry Andric}
52390b57cec5SDimitry Andric
52400b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
524106c3fb27SDimitry Andric  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG;
52420b57cec5SDimitry Andric
52430b57cec5SDimitry Andricdefm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
52440b57cec5SDimitry Andric
52450b57cec5SDimitry Andric
52460b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
52470b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> {
52480b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
52490b57cec5SDimitry Andric  def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
52500b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
52510b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
52520b57cec5SDimitry Andric                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
525306c3fb27SDimitry Andric                   Sched<[WriteVecExtract]>;
52540b57cec5SDimitry Andric
52550b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
52560b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52570b57cec5SDimitry Andric                 (ins i16mem:$dst, VR128:$src1, u8imm:$src2),
52580b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52590b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5260e8d8bef9SDimitry Andric                 [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
52610b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52620b57cec5SDimitry Andric}
52630b57cec5SDimitry Andric
52640b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
526506c3fb27SDimitry Andric  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, WIG;
52660b57cec5SDimitry Andric
52670b57cec5SDimitry Andricdefm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
52680b57cec5SDimitry Andric
526981ad6265SDimitry Andriclet Predicates = [UseSSE41] in
527081ad6265SDimitry Andric  def : Pat<(store f16:$src, addr:$dst), (PEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
527181ad6265SDimitry Andric
527281ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
527381ad6265SDimitry Andric  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
527481ad6265SDimitry Andric
52750b57cec5SDimitry Andric
52760b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
52770b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> {
52780b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
52790b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
52800b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52810b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52820b57cec5SDimitry Andric                 [(set GR32:$dst,
52830b57cec5SDimitry Andric                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
52840b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
52850b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52860b57cec5SDimitry Andric                 (ins i32mem:$dst, VR128:$src1, u8imm:$src2),
52870b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52880b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52890b57cec5SDimitry Andric                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
52900b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52910b57cec5SDimitry Andric}
52920b57cec5SDimitry Andric
52930b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
52940b57cec5SDimitry Andric  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
52950b57cec5SDimitry Andric
52960b57cec5SDimitry Andricdefm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
52970b57cec5SDimitry Andric
52980b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
52990b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> {
53000b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
53010b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
53020b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
53030b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
53040b57cec5SDimitry Andric                 [(set GR64:$dst,
53050b57cec5SDimitry Andric                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>,
53060b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
53070b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
53080b57cec5SDimitry Andric                 (ins i64mem:$dst, VR128:$src1, u8imm:$src2),
53090b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
53100b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
53110b57cec5SDimitry Andric                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
53120b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
53130b57cec5SDimitry Andric}
53140b57cec5SDimitry Andric
53150b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
531606c3fb27SDimitry Andric  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, REX_W;
53170b57cec5SDimitry Andric
53180b57cec5SDimitry Andricdefm PEXTRQ      : SS41I_extract64<0x16, "pextrq">, REX_W;
53190b57cec5SDimitry Andric
53200b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
53210b57cec5SDimitry Andric/// destination
53220b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
53230b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
53240b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
53250b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
53260b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
53270b57cec5SDimitry Andric                   [(set GR32orGR64:$dst,
53280b57cec5SDimitry Andric                      (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
53290b57cec5SDimitry Andric                   Sched<[WriteVecExtract]>;
53300b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
53310b57cec5SDimitry Andric                   (ins f32mem:$dst, VR128:$src1, u8imm:$src2),
53320b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
53330b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
53340b57cec5SDimitry Andric                   [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
53350b57cec5SDimitry Andric                            addr:$dst)]>, Sched<[WriteVecExtractSt]>;
53360b57cec5SDimitry Andric}
53370b57cec5SDimitry Andric
53380b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
53390b57cec5SDimitry Andric  let Predicates = [UseAVX] in
534006c3fb27SDimitry Andric    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, WIG;
53410b57cec5SDimitry Andric  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
53420b57cec5SDimitry Andric}
53430b57cec5SDimitry Andric
53440b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
53450b57cec5SDimitry Andric// SSE4.1 - Insert Instructions
53460b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
53470b57cec5SDimitry Andric
53480b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
53490b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
53500b57cec5SDimitry Andric      (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3),
53510b57cec5SDimitry Andric      !if(Is2Addr,
53520b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53530b57cec5SDimitry Andric        !strconcat(asm,
53540b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53550b57cec5SDimitry Andric      [(set VR128:$dst,
5356e8d8bef9SDimitry Andric        (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
53570b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
53580b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
53590b57cec5SDimitry Andric      (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
53600b57cec5SDimitry Andric      !if(Is2Addr,
53610b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53620b57cec5SDimitry Andric        !strconcat(asm,
53630b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53640b57cec5SDimitry Andric      [(set VR128:$dst,
5365e8d8bef9SDimitry Andric        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
53660b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
53670b57cec5SDimitry Andric}
53680b57cec5SDimitry Andric
5369bdd1243dSDimitry Andriclet Predicates = [HasAVX, NoBWI] in {
537006c3fb27SDimitry Andric  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, WIG;
5371bdd1243dSDimitry Andric  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
5372bdd1243dSDimitry Andric            (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
5373bdd1243dSDimitry Andric                       GR8:$src2, sub_8bit), timm:$src3)>;
5374bdd1243dSDimitry Andric}
5375bdd1243dSDimitry Andric
53760b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
53770b57cec5SDimitry Andric  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
53780b57cec5SDimitry Andric
53790b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
53800b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
53810b57cec5SDimitry Andric      (ins VR128:$src1, GR32:$src2, u8imm:$src3),
53820b57cec5SDimitry Andric      !if(Is2Addr,
53830b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53840b57cec5SDimitry Andric        !strconcat(asm,
53850b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53860b57cec5SDimitry Andric      [(set VR128:$dst,
53870b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
53880b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
53890b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
53900b57cec5SDimitry Andric      (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
53910b57cec5SDimitry Andric      !if(Is2Addr,
53920b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53930b57cec5SDimitry Andric        !strconcat(asm,
53940b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53950b57cec5SDimitry Andric      [(set VR128:$dst,
53960b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
53970b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
53980b57cec5SDimitry Andric}
53990b57cec5SDimitry Andric
54000b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
54010b57cec5SDimitry Andric  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
54020b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
54030b57cec5SDimitry Andric  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
54040b57cec5SDimitry Andric
54050b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
54060b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
54070b57cec5SDimitry Andric      (ins VR128:$src1, GR64:$src2, u8imm:$src3),
54080b57cec5SDimitry Andric      !if(Is2Addr,
54090b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
54100b57cec5SDimitry Andric        !strconcat(asm,
54110b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
54120b57cec5SDimitry Andric      [(set VR128:$dst,
54130b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
54140b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
54150b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
54160b57cec5SDimitry Andric      (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
54170b57cec5SDimitry Andric      !if(Is2Addr,
54180b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
54190b57cec5SDimitry Andric        !strconcat(asm,
54200b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
54210b57cec5SDimitry Andric      [(set VR128:$dst,
54220b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
54230b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
54240b57cec5SDimitry Andric}
54250b57cec5SDimitry Andric
54260b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
542706c3fb27SDimitry Andric  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, REX_W;
54280b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
54290b57cec5SDimitry Andric  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
54300b57cec5SDimitry Andric
54310b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which
54320b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination
54330b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements
54340b57cec5SDimitry Andric// in the target vector.
54350b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
54360b57cec5SDimitry Andric  let isCommutable = 1 in
54370b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
54380b57cec5SDimitry Andric      (ins VR128:$src1, VR128:$src2, u8imm:$src3),
54390b57cec5SDimitry Andric      !if(Is2Addr,
54400b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
54410b57cec5SDimitry Andric        !strconcat(asm,
54420b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
54430b57cec5SDimitry Andric      [(set VR128:$dst,
54448bcb0991SDimitry Andric        (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
54450b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM]>;
54460b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
54470b57cec5SDimitry Andric      (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
54480b57cec5SDimitry Andric      !if(Is2Addr,
54490b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
54500b57cec5SDimitry Andric        !strconcat(asm,
54510b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
54520b57cec5SDimitry Andric      [(set VR128:$dst,
54530b57cec5SDimitry Andric        (X86insertps VR128:$src1,
54540b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
54558bcb0991SDimitry Andric                    timm:$src3))]>,
54560b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
54570b57cec5SDimitry Andric}
54580b57cec5SDimitry Andric
54590b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
54600b57cec5SDimitry Andric  let Predicates = [UseAVX] in
54610b57cec5SDimitry Andric    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
546206c3fb27SDimitry Andric                     VEX_4V, WIG;
54630b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
54640b57cec5SDimitry Andric    defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
54650b57cec5SDimitry Andric}
54660b57cec5SDimitry Andric
54670b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
54680b57cec5SDimitry Andric// SSE4.1 - Round Instructions
54690b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
54700b57cec5SDimitry Andric
54710b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
54720b57cec5SDimitry Andric                           X86MemOperand x86memop, RegisterClass RC,
5473fe6060f1SDimitry Andric                           ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode,
54740b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
54750b57cec5SDimitry Andric  // Intrinsic operation, reg.
54760b57cec5SDimitry Andric  // Vector intrinsic operation, reg
5477480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
54780b57cec5SDimitry Andric  def r : SS4AIi8<opc, MRMSrcReg,
54790b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
54800b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
54810b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54828bcb0991SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
54830b57cec5SDimitry Andric                  Sched<[sched]>;
54840b57cec5SDimitry Andric
54850b57cec5SDimitry Andric  // Vector intrinsic operation, mem
54860b57cec5SDimitry Andric  def m : SS4AIi8<opc, MRMSrcMem,
54870b57cec5SDimitry Andric                  (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
54880b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
54890b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54900b57cec5SDimitry Andric                  [(set RC:$dst,
54918bcb0991SDimitry Andric                        (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
54920b57cec5SDimitry Andric                  Sched<[sched.Folded]>;
54930b57cec5SDimitry Andric}
5494480093f4SDimitry Andric}
54950b57cec5SDimitry Andric
54960b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
54970b57cec5SDimitry Andric                          string OpcodeStr, X86FoldableSchedWrite sched> {
54988bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
54990b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
55000b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
55010b57cec5SDimitry Andric        !strconcat(OpcodeStr,
55020b57cec5SDimitry Andric            "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
55030b57cec5SDimitry Andric      []>, Sched<[sched]>;
55040b57cec5SDimitry Andric
55050b57cec5SDimitry Andric  let mayLoad = 1 in
55060b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
55070b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
55080b57cec5SDimitry Andric        !strconcat(OpcodeStr,
55090b57cec5SDimitry Andric             "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
55100b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
55110b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
55120b57cec5SDimitry Andric
55138bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
55140b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
55150b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
55160b57cec5SDimitry Andric        !strconcat(OpcodeStr,
55170b57cec5SDimitry Andric              "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
55180b57cec5SDimitry Andric        []>, Sched<[sched]>;
55190b57cec5SDimitry Andric
55200b57cec5SDimitry Andric  let mayLoad = 1 in
55210b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
55220b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
55230b57cec5SDimitry Andric        !strconcat(OpcodeStr,
55240b57cec5SDimitry Andric             "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
55250b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
55260b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
55270b57cec5SDimitry Andric}
55280b57cec5SDimitry Andric
55290b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
55300b57cec5SDimitry Andric                           string OpcodeStr, X86FoldableSchedWrite sched> {
5531480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
55328bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
55330b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
55340b57cec5SDimitry Andric                    (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
55350b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
55360b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
55370b57cec5SDimitry Andric                    []>, Sched<[sched]>;
55380b57cec5SDimitry Andric
55390b57cec5SDimitry Andric  let mayLoad = 1 in
55400b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
55410b57cec5SDimitry Andric                    (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
55420b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
55430b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
55440b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
55450b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
55460b57cec5SDimitry Andric
55478bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
55480b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
55490b57cec5SDimitry Andric                    (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
55500b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
55510b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
55520b57cec5SDimitry Andric                    []>, Sched<[sched]>;
55530b57cec5SDimitry Andric
55540b57cec5SDimitry Andric  let mayLoad = 1 in
55550b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
55560b57cec5SDimitry Andric                    (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
55570b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
55580b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
55590b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
55600b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
55610b57cec5SDimitry Andric}
5562480093f4SDimitry Andric}
55630b57cec5SDimitry Andric
55640b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
55650b57cec5SDimitry Andric                            string OpcodeStr, X86FoldableSchedWrite sched,
55660b57cec5SDimitry Andric                            ValueType VT32, ValueType VT64,
55670b57cec5SDimitry Andric                            SDNode OpNode, bit Is2Addr = 1> {
5568480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
55698bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle in {
55700b57cec5SDimitry Andric  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
55710b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
55720b57cec5SDimitry Andric        !if(Is2Addr,
55730b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55740b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55750b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55760b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55778bcb0991SDimitry Andric        [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
55780b57cec5SDimitry Andric        Sched<[sched]>;
55790b57cec5SDimitry Andric
55800b57cec5SDimitry Andric  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
55810b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
55820b57cec5SDimitry Andric        !if(Is2Addr,
55830b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55840b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55850b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55860b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55870b57cec5SDimitry Andric        [(set VR128:$dst,
55885ffd83dbSDimitry Andric             (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>,
55890b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
55900b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
55910b57cec5SDimitry Andric
55928bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble in {
55930b57cec5SDimitry Andric  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
55940b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
55950b57cec5SDimitry Andric        !if(Is2Addr,
55960b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55970b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55980b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55990b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
56008bcb0991SDimitry Andric        [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
56010b57cec5SDimitry Andric        Sched<[sched]>;
56020b57cec5SDimitry Andric
56030b57cec5SDimitry Andric  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
56040b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
56050b57cec5SDimitry Andric        !if(Is2Addr,
56060b57cec5SDimitry Andric            !strconcat(OpcodeStr,
56070b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
56080b57cec5SDimitry Andric            !strconcat(OpcodeStr,
56090b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
56100b57cec5SDimitry Andric        [(set VR128:$dst,
56115ffd83dbSDimitry Andric              (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>,
56120b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
56130b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
56140b57cec5SDimitry Andric}
5615480093f4SDimitry Andric}
56160b57cec5SDimitry Andric
56170b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd
56180b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5619480093f4SDimitry Andric  let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
56200b57cec5SDimitry Andric    // Intrinsic form
56210b57cec5SDimitry Andric    defm VROUNDPS  : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
5622480093f4SDimitry Andric                                     loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
562306c3fb27SDimitry Andric                                   VEX, WIG;
56240b57cec5SDimitry Andric    defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
5625480093f4SDimitry Andric                                     loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
562606c3fb27SDimitry Andric                                   VEX, VEX_L, WIG;
56270b57cec5SDimitry Andric  }
56280b57cec5SDimitry Andric
5629480093f4SDimitry Andric  let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
56300b57cec5SDimitry Andric    defm VROUNDPD  : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
5631480093f4SDimitry Andric                                     loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
563206c3fb27SDimitry Andric                                   VEX, WIG;
56330b57cec5SDimitry Andric    defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
5634480093f4SDimitry Andric                                     loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
563506c3fb27SDimitry Andric                                   VEX, VEX_L, WIG;
56360b57cec5SDimitry Andric  }
56370b57cec5SDimitry Andric}
56380b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
56390b57cec5SDimitry Andric  defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
56400b57cec5SDimitry Andric                                  v4f32, v2f64, X86RndScales, 0>,
564106c3fb27SDimitry Andric                                  VEX_4V, VEX_LIG, WIG, SIMD_EXC;
56420b57cec5SDimitry Andric  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
564306c3fb27SDimitry Andric                                VEX_4V, VEX_LIG, WIG, SIMD_EXC;
56440b57cec5SDimitry Andric}
56450b57cec5SDimitry Andric
56460b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
5647480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
56488bcb0991SDimitry Andric            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
5649480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
56508bcb0991SDimitry Andric            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
56510b57cec5SDimitry Andric}
56520b57cec5SDimitry Andric
56530b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
5654480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
56558bcb0991SDimitry Andric            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
5656480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
56578bcb0991SDimitry Andric            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
56580b57cec5SDimitry Andric}
56590b57cec5SDimitry Andric
56600b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
56610b57cec5SDimitry Andricdefm ROUNDPS  : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
5662480093f4SDimitry Andric                                memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
56630b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
56640b57cec5SDimitry Andricdefm ROUNDPD  : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
5665480093f4SDimitry Andric                                memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
56660b57cec5SDimitry Andric
56670b57cec5SDimitry Andricdefm ROUND  : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
56680b57cec5SDimitry Andric
56690b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
56700b57cec5SDimitry Andricdefm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
56710b57cec5SDimitry Andric                               v4f32, v2f64, X86RndScales>;
56720b57cec5SDimitry Andric
56730b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
5674480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
56758bcb0991SDimitry Andric            (ROUNDSSr FR32:$src1, timm:$src2)>;
5676480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
56778bcb0991SDimitry Andric            (ROUNDSDr FR64:$src1, timm:$src2)>;
56780b57cec5SDimitry Andric}
56790b57cec5SDimitry Andric
56800b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in {
5681480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
56828bcb0991SDimitry Andric            (ROUNDSSm addr:$src1, timm:$src2)>;
5683480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
56848bcb0991SDimitry Andric            (ROUNDSDm addr:$src1, timm:$src2)>;
56850b57cec5SDimitry Andric}
56860b57cec5SDimitry Andric
56870b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56880b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test
56890b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56900b57cec5SDimitry Andric
56910b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from
56920b57cec5SDimitry Andric// the intel intrinsic that corresponds to this.
56930b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
56940b57cec5SDimitry Andricdef VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
56950b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
56960b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
569706c3fb27SDimitry Andric                Sched<[SchedWriteVecTest.XMM]>, VEX, WIG;
56980b57cec5SDimitry Andricdef VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
56990b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
57000b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
57010b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
570206c3fb27SDimitry Andric                VEX, WIG;
57030b57cec5SDimitry Andric
57040b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
57050b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
57060b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
570706c3fb27SDimitry Andric                Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, WIG;
57080b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
57090b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
57100b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
57110b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
571206c3fb27SDimitry Andric                VEX, VEX_L, WIG;
57130b57cec5SDimitry Andric}
57140b57cec5SDimitry Andric
57150b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
57160b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
57170b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
57180b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
57190b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM]>;
57200b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
57210b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
57220b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
57230b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
57240b57cec5SDimitry Andric}
57250b57cec5SDimitry Andric
57260b57cec5SDimitry Andric// The bit test instructions below are AVX only
57270b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
57280b57cec5SDimitry Andric                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
57290b57cec5SDimitry Andric                       X86FoldableSchedWrite sched> {
57300b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
57310b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
57320b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
57330b57cec5SDimitry Andric            Sched<[sched]>, VEX;
57340b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
57350b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
57360b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
57370b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
57380b57cec5SDimitry Andric}
57390b57cec5SDimitry Andric
57400b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
57410b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
57420b57cec5SDimitry Andricdefm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
57430b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
57440b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
57450b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
57460b57cec5SDimitry Andric}
57470b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
57480b57cec5SDimitry Andricdefm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
57490b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
57500b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
57510b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
57520b57cec5SDimitry Andric}
57530b57cec5SDimitry Andric}
57540b57cec5SDimitry Andric
57550b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
57560b57cec5SDimitry Andric// SSE4.1 - Misc Instructions
57570b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
57580b57cec5SDimitry Andric
57590b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
57600b57cec5SDimitry Andric  def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
57610b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
57620b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
57630b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize16, XS;
57640b57cec5SDimitry Andric  def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
57650b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
57660b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
57670b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
57680b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize16, XS;
57690b57cec5SDimitry Andric
57700b57cec5SDimitry Andric  def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
57710b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
57720b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
57730b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize32, XS;
57740b57cec5SDimitry Andric
57750b57cec5SDimitry Andric  def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
57760b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
57770b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
57780b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
57790b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize32, XS;
57800b57cec5SDimitry Andric
57810b57cec5SDimitry Andric  def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
57820b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
57830b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
57840b57cec5SDimitry Andric                      Sched<[WritePOPCNT]>, XS;
57850b57cec5SDimitry Andric  def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
57860b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
57870b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
57880b57cec5SDimitry Andric                       (implicit EFLAGS)]>,
57890b57cec5SDimitry Andric                       Sched<[WritePOPCNT.Folded]>, XS;
57900b57cec5SDimitry Andric}
57910b57cec5SDimitry Andric
57920b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
57930b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
57940b57cec5SDimitry Andric                                 SDNode OpNode, PatFrag ld_frag,
57950b57cec5SDimitry Andric                                 X86FoldableSchedWrite Sched> {
57960b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
57970b57cec5SDimitry Andric                 (ins VR128:$src),
57980b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
57990b57cec5SDimitry Andric                 [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>,
58000b57cec5SDimitry Andric                 Sched<[Sched]>;
58010b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
58020b57cec5SDimitry Andric                  (ins i128mem:$src),
58030b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
58040b57cec5SDimitry Andric                  [(set VR128:$dst,
58050b57cec5SDimitry Andric                    (v8i16 (OpNode (ld_frag addr:$src))))]>,
58060b57cec5SDimitry Andric                 Sched<[Sched.Folded]>;
58070b57cec5SDimitry Andric}
58080b57cec5SDimitry Andric
58090b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling
58100b57cec5SDimitry Andric// model, although the naming is misleading.
58110b57cec5SDimitry Andriclet Predicates = [HasAVX] in
58120b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
58130b57cec5SDimitry Andric                                         X86phminpos, load,
581406c3fb27SDimitry Andric                                         WritePHMINPOS>, VEX, WIG;
58150b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
58160b57cec5SDimitry Andric                                         X86phminpos, memop,
58170b57cec5SDimitry Andric                                         WritePHMINPOS>;
58180b57cec5SDimitry Andric
58190b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator.
58200b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
58210b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
58220b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
58230b57cec5SDimitry Andric                          bit Is2Addr = 1> {
58240b57cec5SDimitry Andric  let isCommutable = 1 in
58250b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
58260b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
58270b57cec5SDimitry Andric       !if(Is2Addr,
58280b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
58290b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
58300b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
58310b57cec5SDimitry Andric       Sched<[sched]>;
58320b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
58330b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
58340b57cec5SDimitry Andric       !if(Is2Addr,
58350b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
58360b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
58370b57cec5SDimitry Andric       [(set RC:$dst,
58380b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
58390b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
58400b57cec5SDimitry Andric}
58410b57cec5SDimitry Andric
58420b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
58430b57cec5SDimitry Andric  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
58440b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
584506c3fb27SDimitry Andric                                  VEX_4V, WIG;
58460b57cec5SDimitry Andric  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
58470b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
584806c3fb27SDimitry Andric                                  VEX_4V, WIG;
58490b57cec5SDimitry Andric  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
58500b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
585106c3fb27SDimitry Andric                                  VEX_4V, WIG;
58520b57cec5SDimitry Andric  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
58530b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
585406c3fb27SDimitry Andric                                  VEX_4V, WIG;
58550b57cec5SDimitry Andric  defm VPMULDQ   : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
58560b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecIMul.XMM, 0>,
585706c3fb27SDimitry Andric                                  VEX_4V, WIG;
58580b57cec5SDimitry Andric}
58590b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
58600b57cec5SDimitry Andric  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
58610b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
586206c3fb27SDimitry Andric                                  VEX_4V, WIG;
58630b57cec5SDimitry Andric  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
58640b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
586506c3fb27SDimitry Andric                                  VEX_4V, WIG;
58660b57cec5SDimitry Andric  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
58670b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
586806c3fb27SDimitry Andric                                  VEX_4V, WIG;
58690b57cec5SDimitry Andric  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
58700b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
587106c3fb27SDimitry Andric                                  VEX_4V, WIG;
58720b57cec5SDimitry Andric}
58730b57cec5SDimitry Andric
58740b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
58750b57cec5SDimitry Andric  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
58760b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
587706c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58780b57cec5SDimitry Andric  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
58790b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
588006c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58810b57cec5SDimitry Andric  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
58820b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
588306c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58840b57cec5SDimitry Andric  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
58850b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
588606c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58870b57cec5SDimitry Andric  defm VPMULDQY  : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
58880b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecIMul.YMM, 0>,
588906c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58900b57cec5SDimitry Andric}
58910b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
58920b57cec5SDimitry Andric  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
58930b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
589406c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58950b57cec5SDimitry Andric  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
58960b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
589706c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
58980b57cec5SDimitry Andric  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
58990b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
590006c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
59010b57cec5SDimitry Andric  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
59020b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
590306c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
59040b57cec5SDimitry Andric}
59050b57cec5SDimitry Andric
59060b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
59070b57cec5SDimitry Andric  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
59080b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59090b57cec5SDimitry Andric  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
59100b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59110b57cec5SDimitry Andric  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
59120b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59130b57cec5SDimitry Andric  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
59140b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59150b57cec5SDimitry Andric  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
59160b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59170b57cec5SDimitry Andric  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
59180b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59190b57cec5SDimitry Andric  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
59200b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59210b57cec5SDimitry Andric  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
59220b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
59230b57cec5SDimitry Andric  defm PMULDQ   : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
59240b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecIMul.XMM, 1>;
59250b57cec5SDimitry Andric}
59260b57cec5SDimitry Andric
59270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
59280b57cec5SDimitry Andric  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
59290b57cec5SDimitry Andric                                 load, i128mem, SchedWritePMULLD.XMM, 0>,
593006c3fb27SDimitry Andric                                 VEX_4V, WIG;
59310b57cec5SDimitry Andriclet Predicates = [HasAVX] in
59320b57cec5SDimitry Andric  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
59330b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
593406c3fb27SDimitry Andric                                 VEX_4V, WIG;
59350b57cec5SDimitry Andric
59360b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
59370b57cec5SDimitry Andric  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
59380b57cec5SDimitry Andric                                  load, i256mem, SchedWritePMULLD.YMM, 0>,
593906c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
59400b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
59410b57cec5SDimitry Andric  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
59420b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
594306c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
59440b57cec5SDimitry Andric
59450b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
59460b57cec5SDimitry Andric  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
59470b57cec5SDimitry Andric                                memop, i128mem, SchedWritePMULLD.XMM, 1>;
59480b57cec5SDimitry Andric  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
59490b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM, 1>;
59500b57cec5SDimitry Andric}
59510b57cec5SDimitry Andric
59520b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
59530b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
59540b57cec5SDimitry Andric                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
59550b57cec5SDimitry Andric                 X86MemOperand x86memop, bit Is2Addr,
59560b57cec5SDimitry Andric                 X86FoldableSchedWrite sched> {
59570b57cec5SDimitry Andric  let isCommutable = 1 in
59580b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
59590b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
59600b57cec5SDimitry Andric        !if(Is2Addr,
59610b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59620b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59630b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59640b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59658bcb0991SDimitry Andric        [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
59660b57cec5SDimitry Andric        Sched<[sched]>;
59670b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
59680b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
59690b57cec5SDimitry Andric        !if(Is2Addr,
59700b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59710b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59720b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59730b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59740b57cec5SDimitry Andric        [(set RC:$dst,
59758bcb0991SDimitry Andric          (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
59760b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
59770b57cec5SDimitry Andric}
59780b57cec5SDimitry Andric
59790b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
59800b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
59810b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
59820b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr,
59830b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
59840b57cec5SDimitry Andric  let isCommutable = 1 in
59850b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
59860b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
59870b57cec5SDimitry Andric        !if(Is2Addr,
59880b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59890b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59900b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59910b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59928bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
59930b57cec5SDimitry Andric        Sched<[sched]>;
59940b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
59950b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
59960b57cec5SDimitry Andric        !if(Is2Addr,
59970b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59980b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59990b57cec5SDimitry Andric            !strconcat(OpcodeStr,
60000b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
60010b57cec5SDimitry Andric        [(set RC:$dst,
60028bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
60030b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
60040b57cec5SDimitry Andric}
60050b57cec5SDimitry Andric
60068bcb0991SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<timm, [{
60070b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x03;
60080b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x03, SDLoc(N));
60090b57cec5SDimitry Andric}]>;
60100b57cec5SDimitry Andric
60118bcb0991SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<timm, [{
60120b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x0f;
60130b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x0f, SDLoc(N));
60140b57cec5SDimitry Andric}]>;
60150b57cec5SDimitry Andric
60168bcb0991SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<timm, [{
60170b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0xff;
60180b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0xff, SDLoc(N));
60190b57cec5SDimitry Andric}]>;
60200b57cec5SDimitry Andric
60210b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
60228bcb0991SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<timm, [{
60230b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60240b57cec5SDimitry Andric  uint8_t NewImm = 0;
60250b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
60260b57cec5SDimitry Andric    if (Imm & (1 << i))
60270b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
60280b57cec5SDimitry Andric  }
60290b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
60300b57cec5SDimitry Andric}]>;
60310b57cec5SDimitry Andric
60320b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
60338bcb0991SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<timm, [{
60340b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60350b57cec5SDimitry Andric  uint8_t NewImm = 0;
60360b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60370b57cec5SDimitry Andric    if (Imm & (1 << i))
60380b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
60390b57cec5SDimitry Andric  }
60400b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
60410b57cec5SDimitry Andric}]>;
60420b57cec5SDimitry Andric
60430b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
60448bcb0991SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<timm, [{
60450b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60460b57cec5SDimitry Andric  uint8_t NewImm = 0;
60470b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60480b57cec5SDimitry Andric    if (Imm & (1 << i))
60490b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
60500b57cec5SDimitry Andric  }
60510b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
60520b57cec5SDimitry Andric}]>;
60530b57cec5SDimitry Andric
60540b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
60558bcb0991SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<timm, [{
60560b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60570b57cec5SDimitry Andric  uint8_t NewImm = 0;
60580b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
60590b57cec5SDimitry Andric    if (Imm & (1 << i))
60600b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
60610b57cec5SDimitry Andric  }
60620b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
60630b57cec5SDimitry Andric}]>;
60640b57cec5SDimitry Andric
60650b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
60668bcb0991SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<timm, [{
60670b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60680b57cec5SDimitry Andric  uint8_t NewImm = 0;
60690b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60700b57cec5SDimitry Andric    if (Imm & (1 << i))
60710b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
60720b57cec5SDimitry Andric  }
60730b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
60740b57cec5SDimitry Andric}]>;
60750b57cec5SDimitry Andric
60760b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
60778bcb0991SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{
60780b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60790b57cec5SDimitry Andric  uint8_t NewImm = 0;
60800b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60810b57cec5SDimitry Andric    if (Imm & (1 << i))
60820b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
60830b57cec5SDimitry Andric  }
60840b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xf, SDLoc(N));
60850b57cec5SDimitry Andric}]>;
60860b57cec5SDimitry Andric
60870b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
60880b57cec5SDimitry Andric  let isCommutable = 0 in {
60890b57cec5SDimitry Andric    defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
60900b57cec5SDimitry Andric                                        VR128, load, i128mem, 0,
609106c3fb27SDimitry Andric                                        SchedWriteMPSAD.XMM>, VEX_4V, WIG;
60920b57cec5SDimitry Andric  }
60930b57cec5SDimitry Andric
6094480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
60950b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
60960b57cec5SDimitry Andric  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
60970b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
609806c3fb27SDimitry Andric                                   SchedWriteDPPS.XMM>, VEX_4V, WIG;
60990b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
61000b57cec5SDimitry Andric  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
61010b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
610206c3fb27SDimitry Andric                                   SchedWriteDPPD.XMM>, VEX_4V, WIG;
61030b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
61040b57cec5SDimitry Andric  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
61050b57cec5SDimitry Andric                                    VR256, load, i256mem, 0,
610606c3fb27SDimitry Andric                                    SchedWriteDPPS.YMM>, VEX_4V, VEX_L, WIG;
61070b57cec5SDimitry Andric}
6108480093f4SDimitry Andric}
61090b57cec5SDimitry Andric
61100b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
61110b57cec5SDimitry Andric  let isCommutable = 0 in {
61120b57cec5SDimitry Andric  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
61130b57cec5SDimitry Andric                                  VR256, load, i256mem, 0,
611406c3fb27SDimitry Andric                                  SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, WIG;
61150b57cec5SDimitry Andric  }
61160b57cec5SDimitry Andric}
61170b57cec5SDimitry Andric
61180b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
61190b57cec5SDimitry Andric  let isCommutable = 0 in {
61200b57cec5SDimitry Andric  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
61210b57cec5SDimitry Andric                                     VR128, memop, i128mem, 1,
61220b57cec5SDimitry Andric                                     SchedWriteMPSAD.XMM>;
61230b57cec5SDimitry Andric  }
61240b57cec5SDimitry Andric
61250b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
61260b57cec5SDimitry Andric  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
61270b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
6128480093f4SDimitry Andric                                  SchedWriteDPPS.XMM>, SIMD_EXC;
61290b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
61300b57cec5SDimitry Andric  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
61310b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
6132480093f4SDimitry Andric                                  SchedWriteDPPD.XMM>, SIMD_EXC;
61330b57cec5SDimitry Andric}
61340b57cec5SDimitry Andric
61350b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
61360b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
61370b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
61380b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr, Domain d,
61390b57cec5SDimitry Andric                           X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> {
61400b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
61410b57cec5SDimitry Andric  let isCommutable = 1 in
61420b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
61430b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
61440b57cec5SDimitry Andric        !if(Is2Addr,
61450b57cec5SDimitry Andric            !strconcat(OpcodeStr,
61460b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
61470b57cec5SDimitry Andric            !strconcat(OpcodeStr,
61480b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
61498bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
61500b57cec5SDimitry Andric        Sched<[sched]>;
61510b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
61520b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
61530b57cec5SDimitry Andric        !if(Is2Addr,
61540b57cec5SDimitry Andric            !strconcat(OpcodeStr,
61550b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
61560b57cec5SDimitry Andric            !strconcat(OpcodeStr,
61570b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
61580b57cec5SDimitry Andric        [(set RC:$dst,
61598bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
61600b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
61610b57cec5SDimitry Andric}
61620b57cec5SDimitry Andric
61630b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
61648bcb0991SDimitry Andric  def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
61650b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
61668bcb0991SDimitry Andric                                            (commuteXForm timm:$src3))>;
61670b57cec5SDimitry Andric}
61680b57cec5SDimitry Andric
61690b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
61700b57cec5SDimitry Andric  defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
61710b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedSingle,
61720b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm4>,
617306c3fb27SDimitry Andric                                  VEX_4V, WIG;
61740b57cec5SDimitry Andric  defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
61750b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedSingle,
61760b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm8>,
617706c3fb27SDimitry Andric                                   VEX_4V, VEX_L, WIG;
61780b57cec5SDimitry Andric  defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
61790b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedDouble,
61800b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm2>,
618106c3fb27SDimitry Andric                                  VEX_4V, WIG;
61820b57cec5SDimitry Andric  defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
61830b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedDouble,
61840b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm4>,
618506c3fb27SDimitry Andric                                   VEX_4V, VEX_L, WIG;
61860b57cec5SDimitry Andric  defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
61870b57cec5SDimitry Andric                                  VR128, load, i128mem, 0, SSEPackedInt,
61880b57cec5SDimitry Andric                                  SchedWriteBlend.XMM, BlendCommuteImm8>,
618906c3fb27SDimitry Andric                                  VEX_4V, WIG;
61900b57cec5SDimitry Andric}
61910b57cec5SDimitry Andric
61920b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
61930b57cec5SDimitry Andric  defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
61940b57cec5SDimitry Andric                                   VR256, load, i256mem, 0, SSEPackedInt,
61950b57cec5SDimitry Andric                                   SchedWriteBlend.YMM, BlendCommuteImm8>,
619606c3fb27SDimitry Andric                                   VEX_4V, VEX_L, WIG;
61970b57cec5SDimitry Andric}
61980b57cec5SDimitry Andric
61990b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
62000b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on.
62010b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
62028bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
62038bcb0991SDimitry Andric          (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>;
62048bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
62058bcb0991SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>;
62068bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
62078bcb0991SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>;
62080b57cec5SDimitry Andric
62090b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
62100b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize.
62118bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
62128bcb0991SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
62138bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
62148bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
62158bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
62168bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
62170b57cec5SDimitry Andric
62188bcb0991SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
62198bcb0991SDimitry Andric          (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>;
62208bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
62218bcb0991SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>;
62228bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
62238bcb0991SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>;
62240b57cec5SDimitry Andric
62250b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
62260b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
62278bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
62288bcb0991SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
62298bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
62308bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
62318bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
62328bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
62330b57cec5SDimitry Andric}
62340b57cec5SDimitry Andric
62350b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
62360b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedSingle,
62370b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm4>;
62380b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
62390b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedDouble,
62400b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm2>;
62410b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
62420b57cec5SDimitry Andric                               VR128, memop, i128mem, 1, SSEPackedInt,
62430b57cec5SDimitry Andric                               SchedWriteBlend.XMM, BlendCommuteImm8>;
62440b57cec5SDimitry Andric
62450b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
62460b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
62470b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
62488bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
62498bcb0991SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
62508bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
62518bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
62528bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
62538bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
62540b57cec5SDimitry Andric
62558bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
62568bcb0991SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
62578bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
62588bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
62598bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
62608bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
62610b57cec5SDimitry Andric}
62620b57cec5SDimitry Andric
62630b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
62640b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
62650b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
62660b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
62670b57cec5SDimitry Andric          (VBLENDPDYrri VR256:$src1,
62680b57cec5SDimitry Andric                        (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
62690b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0x3)>;
62700b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
62710b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
62720b57cec5SDimitry Andric                        (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
62730b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
62740b57cec5SDimitry Andric
62750b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
62760b57cec5SDimitry Andric          (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
62770b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
62780b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
62790b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
62800b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
62810b57cec5SDimitry Andric}
62820b57cec5SDimitry Andric
62830b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
62840b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
62850b57cec5SDimitry Andric                                X86MemOperand x86memop, ValueType VT,
62860b57cec5SDimitry Andric                                PatFrag mem_frag, SDNode OpNode,
62870b57cec5SDimitry Andric                                X86FoldableSchedWrite sched> {
62880b57cec5SDimitry Andric  def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
62890b57cec5SDimitry Andric                  (ins RC:$src1, RC:$src2, RC:$src3),
62900b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
62910b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
62920b57cec5SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
62930b57cec5SDimitry Andric                  SSEPackedInt>, TAPD, VEX_4V,
62940b57cec5SDimitry Andric                Sched<[sched]>;
62950b57cec5SDimitry Andric
62960b57cec5SDimitry Andric  def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
62970b57cec5SDimitry Andric                  (ins RC:$src1, x86memop:$src2, RC:$src3),
62980b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
62990b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
63000b57cec5SDimitry Andric                  [(set RC:$dst,
63010b57cec5SDimitry Andric                        (OpNode RC:$src3, (mem_frag addr:$src2),
63020b57cec5SDimitry Andric                                RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
63030b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold,
63040b57cec5SDimitry Andric                       // x86memop:$src2
63050b57cec5SDimitry Andric                       ReadDefault, ReadDefault, ReadDefault, ReadDefault,
63060b57cec5SDimitry Andric                       ReadDefault,
63070b57cec5SDimitry Andric                       // RC::$src3
63080b57cec5SDimitry Andric                       sched.ReadAfterFold]>;
63090b57cec5SDimitry Andric}
63100b57cec5SDimitry Andric
63110b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
63120b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
63130b57cec5SDimitry Andricdefm VBLENDVPD  : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
63140b57cec5SDimitry Andric                                       v2f64, loadv2f64, X86Blendv,
63150b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
63160b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
63170b57cec5SDimitry Andric                                       v4f64, loadv4f64, X86Blendv,
63180b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
63190b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble
63200b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
63210b57cec5SDimitry Andricdefm VBLENDVPS  : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
63220b57cec5SDimitry Andric                                       v4f32, loadv4f32, X86Blendv,
63230b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
63240b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
63250b57cec5SDimitry Andric                                       v8f32, loadv8f32, X86Blendv,
63260b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
63270b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle
63280b57cec5SDimitry Andricdefm VPBLENDVB  : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
63290b57cec5SDimitry Andric                                       v16i8, loadv16i8, X86Blendv,
63300b57cec5SDimitry Andric                                       SchedWriteVarBlend.XMM>;
63310b57cec5SDimitry Andric}
63320b57cec5SDimitry Andric
63330b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
63340b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
63350b57cec5SDimitry Andric                                       v32i8, loadv32i8, X86Blendv,
63360b57cec5SDimitry Andric                                       SchedWriteVarBlend.YMM>, VEX_L;
63370b57cec5SDimitry Andric}
63380b57cec5SDimitry Andric
63390b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
63400b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
63410b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
63420b57cec5SDimitry Andric            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
63430b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
63440b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
63450b57cec5SDimitry Andric            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
63460b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
63470b57cec5SDimitry Andric                              (v8i32 VR256:$src2))),
63480b57cec5SDimitry Andric            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
63490b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
63500b57cec5SDimitry Andric                              (v4i64 VR256:$src2))),
63510b57cec5SDimitry Andric            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
63520b57cec5SDimitry Andric}
63530b57cec5SDimitry Andric
63540b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
63550b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
63560b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
63570b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in {
63580b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
63590b57cec5SDimitry Andric            (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
63600b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
63610b57cec5SDimitry Andric            (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
63620b57cec5SDimitry Andric
63630b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
63640b57cec5SDimitry Andric            (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
63650b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))),
63660b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
63670b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)),
63680b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
63690b57cec5SDimitry Andric
63700b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
63710b57cec5SDimitry Andric            (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
63720b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))),
63730b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
63740b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)),
63750b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
63760b57cec5SDimitry Andric
63770b57cec5SDimitry Andric  // Move low f32 and clear high bits.
63780b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
63790b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
63800b57cec5SDimitry Andric             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
63810b57cec5SDimitry Andric                          (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)),
63820b57cec5SDimitry Andric                          (i8 1))), sub_xmm)>;
63830b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
63840b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
63850b57cec5SDimitry Andric             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
63860b57cec5SDimitry Andric                          (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
63870b57cec5SDimitry Andric                          (i8 3))), sub_xmm)>;
63880b57cec5SDimitry Andric}
63890b57cec5SDimitry Andric
63900b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
63910b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
63920b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
63930b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in {
63940b57cec5SDimitry Andric  // With SSE41 we can use blends for these patterns.
63950b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
63960b57cec5SDimitry Andric            (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
63970b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
63980b57cec5SDimitry Andric            (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
63990b57cec5SDimitry Andric
64000b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
64010b57cec5SDimitry Andric            (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
64020b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))),
64030b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
64040b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)),
64050b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
64060b57cec5SDimitry Andric
64070b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
64080b57cec5SDimitry Andric            (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
64090b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))),
64100b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
64110b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)),
64120b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
64130b57cec5SDimitry Andric}
64140b57cec5SDimitry Andric
64150b57cec5SDimitry Andric
64160b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator
64170b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in {
64180b57cec5SDimitry Andric  multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
64190b57cec5SDimitry Andric                           PatFrag mem_frag, X86MemOperand x86memop,
64200b57cec5SDimitry Andric                           SDNode OpNode, X86FoldableSchedWrite sched> {
64210b57cec5SDimitry Andric    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
64220b57cec5SDimitry Andric                    (ins VR128:$src1, VR128:$src2),
64230b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
64240b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
64250b57cec5SDimitry Andric                    [(set VR128:$dst,
64260b57cec5SDimitry Andric                      (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
64270b57cec5SDimitry Andric                    Sched<[sched]>;
64280b57cec5SDimitry Andric
64290b57cec5SDimitry Andric    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
64300b57cec5SDimitry Andric                    (ins VR128:$src1, x86memop:$src2),
64310b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
64320b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
64330b57cec5SDimitry Andric                    [(set VR128:$dst,
64340b57cec5SDimitry Andric                      (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
64350b57cec5SDimitry Andric                    Sched<[sched.Folded, sched.ReadAfterFold]>;
64360b57cec5SDimitry Andric  }
64370b57cec5SDimitry Andric}
64380b57cec5SDimitry Andric
64390b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
64400b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
64410b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
64420b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
64430b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
64440b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
64450b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
64460b57cec5SDimitry Andric                              X86Blendv, SchedWriteVarBlend.XMM>;
64470b57cec5SDimitry Andric
64480b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument
64490b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
64500b57cec5SDimitry Andric                (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
64510b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
64520b57cec5SDimitry Andric                (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
64530b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
64540b57cec5SDimitry Andric                (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
64550b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
64560b57cec5SDimitry Andric                (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
64570b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
64580b57cec5SDimitry Andric                (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
64590b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
64600b57cec5SDimitry Andric                (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
64610b57cec5SDimitry Andric
64620b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
64630b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
64640b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
64650b57cec5SDimitry Andric            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
64660b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
64670b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
64680b57cec5SDimitry Andric            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
64690b57cec5SDimitry Andric}
64700b57cec5SDimitry Andric
64710b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
64720b57cec5SDimitry Andric
64730b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
64740b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
64750b57cec5SDimitry Andric                        "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
647606c3fb27SDimitry Andric                        Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, WIG;
64770b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
64780b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
64790b57cec5SDimitry Andric                         "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
648006c3fb27SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, WIG;
64810b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
64820b57cec5SDimitry Andric                       "movntdqa\t{$src, $dst|$dst, $src}", []>,
64830b57cec5SDimitry Andric                       Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
64840b57cec5SDimitry Andric
64850b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
64860b57cec5SDimitry Andric  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
64870b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64880b57cec5SDimitry Andric  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
64890b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64900b57cec5SDimitry Andric  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
64910b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64920b57cec5SDimitry Andric  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
64930b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64940b57cec5SDimitry Andric  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
64950b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
6496fcaf7f86SDimitry Andric  def : Pat<(v16f16 (alignednontemporalload addr:$src)),
6497fcaf7f86SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64980b57cec5SDimitry Andric  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
64990b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
65000b57cec5SDimitry Andric}
65010b57cec5SDimitry Andric
65020b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
65030b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
65040b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65050b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
65060b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65070b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
65080b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65090b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
65100b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65110b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
65120b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
6513fcaf7f86SDimitry Andric  def : Pat<(v8f16 (alignednontemporalload addr:$src)),
6514fcaf7f86SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65150b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
65160b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
65170b57cec5SDimitry Andric}
65180b57cec5SDimitry Andric
65190b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
65200b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
65210b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
65220b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
65230b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
65240b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
65250b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
65260b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
65270b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
65280b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
65290b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
6530fcaf7f86SDimitry Andric  def : Pat<(v8f16 (alignednontemporalload addr:$src)),
6531fcaf7f86SDimitry Andric            (MOVNTDQArm addr:$src)>;
65320b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
65330b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
65340b57cec5SDimitry Andric}
65350b57cec5SDimitry Andric
65360b57cec5SDimitry Andric} // AddedComplexity
65370b57cec5SDimitry Andric
65380b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65390b57cec5SDimitry Andric// SSE4.2 - Compare Instructions
65400b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65410b57cec5SDimitry Andric
65420b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator
65430b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
65440b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
65450b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
65460b57cec5SDimitry Andric                          bit Is2Addr = 1> {
65470b57cec5SDimitry Andric  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
65480b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
65490b57cec5SDimitry Andric       !if(Is2Addr,
65500b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
65510b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
65520b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
65530b57cec5SDimitry Andric       Sched<[sched]>;
65540b57cec5SDimitry Andric  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
65550b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
65560b57cec5SDimitry Andric       !if(Is2Addr,
65570b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
65580b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
65590b57cec5SDimitry Andric       [(set RC:$dst,
65600b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
65610b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
65620b57cec5SDimitry Andric}
65630b57cec5SDimitry Andric
65640b57cec5SDimitry Andriclet Predicates = [HasAVX] in
65650b57cec5SDimitry Andric  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
65660b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
656706c3fb27SDimitry Andric                                 VEX_4V, WIG;
65680b57cec5SDimitry Andric
65690b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
65700b57cec5SDimitry Andric  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
65710b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
657206c3fb27SDimitry Andric                                  VEX_4V, VEX_L, WIG;
65730b57cec5SDimitry Andric
65740b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
65750b57cec5SDimitry Andric  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
65760b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM>;
65770b57cec5SDimitry Andric
65780b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65790b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions
65800b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65810b57cec5SDimitry Andric
65820b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> {
65830b57cec5SDimitry Andric  def rr : SS42AI<0x62, MRMSrcReg, (outs),
65840b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
65850b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65860b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM]>;
65870b57cec5SDimitry Andric  let mayLoad = 1 in
65880b57cec5SDimitry Andric  def rm :SS42AI<0x62, MRMSrcMem, (outs),
65890b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
65900b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65910b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
65920b57cec5SDimitry Andric}
65930b57cec5SDimitry Andric
65940b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
65950b57cec5SDimitry Andric  let Predicates = [HasAVX] in
659606c3fb27SDimitry Andric  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, WIG;
65970b57cec5SDimitry Andric  defm PCMPISTRM  : pcmpistrm_SS42AI<"pcmpistrm"> ;
65980b57cec5SDimitry Andric}
65990b57cec5SDimitry Andric
66000b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> {
66010b57cec5SDimitry Andric  def rr : SS42AI<0x60, MRMSrcReg, (outs),
66020b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
66030b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
66040b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM]>;
66050b57cec5SDimitry Andric  let mayLoad = 1 in
66060b57cec5SDimitry Andric  def rm : SS42AI<0x60, MRMSrcMem, (outs),
66070b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
66080b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
66090b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
66100b57cec5SDimitry Andric}
66110b57cec5SDimitry Andric
66120b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
66130b57cec5SDimitry Andric  let Predicates = [HasAVX] in
661406c3fb27SDimitry Andric  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, WIG;
66150b57cec5SDimitry Andric  defm PCMPESTRM :  SS42AI_pcmpestrm<"pcmpestrm">;
66160b57cec5SDimitry Andric}
66170b57cec5SDimitry Andric
66180b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> {
66190b57cec5SDimitry Andric  def rr : SS42AI<0x63, MRMSrcReg, (outs),
66200b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
66210b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
66220b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI]>;
66230b57cec5SDimitry Andric  let mayLoad = 1 in
66240b57cec5SDimitry Andric  def rm : SS42AI<0x63, MRMSrcMem, (outs),
66250b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
66260b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
66270b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
66280b57cec5SDimitry Andric}
66290b57cec5SDimitry Andric
66300b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
66310b57cec5SDimitry Andric  let Predicates = [HasAVX] in
663206c3fb27SDimitry Andric  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, WIG;
66330b57cec5SDimitry Andric  defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
66340b57cec5SDimitry Andric}
66350b57cec5SDimitry Andric
66360b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> {
66370b57cec5SDimitry Andric  def rr : SS42AI<0x61, MRMSrcReg, (outs),
66380b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
66390b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
66400b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI]>;
66410b57cec5SDimitry Andric  let mayLoad = 1 in
66420b57cec5SDimitry Andric  def rm : SS42AI<0x61, MRMSrcMem, (outs),
66430b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
66440b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
66450b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
66460b57cec5SDimitry Andric}
66470b57cec5SDimitry Andric
66480b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
66490b57cec5SDimitry Andric  let Predicates = [HasAVX] in
665006c3fb27SDimitry Andric  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, WIG;
66510b57cec5SDimitry Andric  defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
66520b57cec5SDimitry Andric}
66530b57cec5SDimitry Andric
66540b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
66550b57cec5SDimitry Andric// SSE4.2 - CRC Instructions
66560b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
66570b57cec5SDimitry Andric
66580b57cec5SDimitry Andric// No CRC instructions have AVX equivalents
66590b57cec5SDimitry Andric
66600b57cec5SDimitry Andric// crc intrinsic instruction
66610b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size
66620b57cec5SDimitry Andric// of r and m.
66630b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
66640b57cec5SDimitry Andric                   RegisterClass RCIn, SDPatternOperator Int> :
6665349cc55cSDimitry Andric  CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
66660b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
66670b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
66680b57cec5SDimitry Andric         Sched<[WriteCRC32]>;
66690b57cec5SDimitry Andric
66700b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
66710b57cec5SDimitry Andric                   X86MemOperand x86memop, SDPatternOperator Int> :
6672349cc55cSDimitry Andric  CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
66730b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
66740b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
66750b57cec5SDimitry Andric         Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
66760b57cec5SDimitry Andric
66770b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
66780b57cec5SDimitry Andric  def CRC32r32m8  : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
66790b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
66800b57cec5SDimitry Andric  def CRC32r32r8  : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
66810b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
66820b57cec5SDimitry Andric  def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
66830b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
66840b57cec5SDimitry Andric  def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
66850b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
66860b57cec5SDimitry Andric  def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
66870b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
66880b57cec5SDimitry Andric  def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
66890b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
66900b57cec5SDimitry Andric  def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
66910b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
66920b57cec5SDimitry Andric  def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
66930b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
66940b57cec5SDimitry Andric  let hasSideEffects = 0 in {
66950b57cec5SDimitry Andric    let mayLoad = 1 in
66960b57cec5SDimitry Andric    def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
66970b57cec5SDimitry Andric                                   null_frag>, REX_W;
66980b57cec5SDimitry Andric    def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
66990b57cec5SDimitry Andric                                   null_frag>, REX_W;
67000b57cec5SDimitry Andric  }
67010b57cec5SDimitry Andric}
67020b57cec5SDimitry Andric
67030b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67040b57cec5SDimitry Andric// SHA-NI Instructions
67050b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67060b57cec5SDimitry Andric
67070b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
67080b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
67090b57cec5SDimitry Andric                      X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
67100b57cec5SDimitry Andric  def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
67110b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
67120b57cec5SDimitry Andric             !if(UsesXMM0,
67130b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
67140b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
67150b57cec5SDimitry Andric             [!if(UsesXMM0,
67160b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
67170b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
67185ffd83dbSDimitry Andric             T8PS, Sched<[sched]>;
67190b57cec5SDimitry Andric
67200b57cec5SDimitry Andric  def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
67210b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
67220b57cec5SDimitry Andric             !if(UsesXMM0,
67230b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
67240b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
67250b57cec5SDimitry Andric             [!if(UsesXMM0,
67260b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
67270b57cec5SDimitry Andric                    (memop addr:$src2), XMM0)),
67280b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
67295ffd83dbSDimitry Andric                    (memop addr:$src2))))]>, T8PS,
67300b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
67310b57cec5SDimitry Andric}
67320b57cec5SDimitry Andric
67330b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
67340b57cec5SDimitry Andric  def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
67350b57cec5SDimitry Andric                         (ins VR128:$src1, VR128:$src2, u8imm:$src3),
67360b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
67370b57cec5SDimitry Andric                         [(set VR128:$dst,
67380b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
67395ffd83dbSDimitry Andric                            (i8 timm:$src3)))]>, TAPS,
67400b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM]>;
67410b57cec5SDimitry Andric  def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
67420b57cec5SDimitry Andric                         (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
67430b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
67440b57cec5SDimitry Andric                         [(set VR128:$dst,
67450b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1,
67460b57cec5SDimitry Andric                            (memop addr:$src2),
67475ffd83dbSDimitry Andric                            (i8 timm:$src3)))]>, TAPS,
67480b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM.Folded,
67490b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM.ReadAfterFold]>;
67500b57cec5SDimitry Andric
67510b57cec5SDimitry Andric  defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
67520b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
67530b57cec5SDimitry Andric  defm SHA1MSG1  : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
67540b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
67550b57cec5SDimitry Andric  defm SHA1MSG2  : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
67560b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
67570b57cec5SDimitry Andric
67580b57cec5SDimitry Andric  let Uses=[XMM0] in
67590b57cec5SDimitry Andric  defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
67600b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM, 1>;
67610b57cec5SDimitry Andric
67620b57cec5SDimitry Andric  defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
67630b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
67640b57cec5SDimitry Andric  defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
67650b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
67660b57cec5SDimitry Andric}
67670b57cec5SDimitry Andric
67680b57cec5SDimitry Andric// Aliases with explicit %xmm0
67690b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
67700b57cec5SDimitry Andric                (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
67710b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
67720b57cec5SDimitry Andric                (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
67730b57cec5SDimitry Andric
67740b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67750b57cec5SDimitry Andric// AES-NI Instructions
67760b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67770b57cec5SDimitry Andric
67780b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
67790b57cec5SDimitry Andric                             Intrinsic IntId, PatFrag ld_frag,
67800b57cec5SDimitry Andric                             bit Is2Addr = 0, RegisterClass RC = VR128,
67810b57cec5SDimitry Andric                             X86MemOperand MemOp = i128mem> {
67825ffd83dbSDimitry Andric  let AsmString = OpcodeStr#
67830b57cec5SDimitry Andric                  !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}",
67840b57cec5SDimitry Andric                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
67850b57cec5SDimitry Andric    def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst),
67860b57cec5SDimitry Andric                   (ins RC:$src1, RC:$src2), "",
67870b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>,
67880b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc]>;
67890b57cec5SDimitry Andric    def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
67900b57cec5SDimitry Andric                   (ins RC:$src1, MemOp:$src2), "",
67910b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
67920b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
67930b57cec5SDimitry Andric  }
67940b57cec5SDimitry Andric}
67950b57cec5SDimitry Andric
67960b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow
67970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
67980b57cec5SDimitry Andric  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
679906c3fb27SDimitry Andric                         int_x86_aesni_aesenc, load>, VEX_4V, WIG;
68000b57cec5SDimitry Andric  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
680106c3fb27SDimitry Andric                         int_x86_aesni_aesenclast, load>, VEX_4V, WIG;
68020b57cec5SDimitry Andric  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
680306c3fb27SDimitry Andric                         int_x86_aesni_aesdec, load>, VEX_4V, WIG;
68040b57cec5SDimitry Andric  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
680506c3fb27SDimitry Andric                         int_x86_aesni_aesdeclast, load>, VEX_4V, WIG;
68060b57cec5SDimitry Andric}
68070b57cec5SDimitry Andric
68080b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in {
68090b57cec5SDimitry Andric  defm VAESENCY         : AESI_binop_rm_int<0xDC, "vaesenc",
68100b57cec5SDimitry Andric                         int_x86_aesni_aesenc_256, load, 0, VR256,
681106c3fb27SDimitry Andric                         i256mem>, VEX_4V, VEX_L, WIG;
68120b57cec5SDimitry Andric  defm VAESENCLASTY     : AESI_binop_rm_int<0xDD, "vaesenclast",
68130b57cec5SDimitry Andric                         int_x86_aesni_aesenclast_256, load, 0, VR256,
681406c3fb27SDimitry Andric                         i256mem>, VEX_4V, VEX_L, WIG;
68150b57cec5SDimitry Andric  defm VAESDECY         : AESI_binop_rm_int<0xDE, "vaesdec",
68160b57cec5SDimitry Andric                         int_x86_aesni_aesdec_256, load, 0, VR256,
681706c3fb27SDimitry Andric                         i256mem>, VEX_4V, VEX_L, WIG;
68180b57cec5SDimitry Andric  defm VAESDECLASTY     : AESI_binop_rm_int<0xDF, "vaesdeclast",
68190b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast_256, load, 0, VR256,
682006c3fb27SDimitry Andric                         i256mem>, VEX_4V, VEX_L, WIG;
68210b57cec5SDimitry Andric}
68220b57cec5SDimitry Andric
68230b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
68240b57cec5SDimitry Andric  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
68250b57cec5SDimitry Andric                         int_x86_aesni_aesenc, memop, 1>;
68260b57cec5SDimitry Andric  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
68270b57cec5SDimitry Andric                         int_x86_aesni_aesenclast, memop, 1>;
68280b57cec5SDimitry Andric  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
68290b57cec5SDimitry Andric                         int_x86_aesni_aesdec, memop, 1>;
68300b57cec5SDimitry Andric  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
68310b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast, memop, 1>;
68320b57cec5SDimitry Andric}
68330b57cec5SDimitry Andric
68340b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation
68350b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
68360b57cec5SDimitry Andric  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
68370b57cec5SDimitry Andric      (ins VR128:$src1),
68380b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
68390b57cec5SDimitry Andric      [(set VR128:$dst,
68400b57cec5SDimitry Andric        (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
684106c3fb27SDimitry Andric      VEX, WIG;
68420b57cec5SDimitry Andric  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
68430b57cec5SDimitry Andric      (ins i128mem:$src1),
68440b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
68450b57cec5SDimitry Andric      [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
684606c3fb27SDimitry Andric      Sched<[WriteAESIMC.Folded]>, VEX, WIG;
68470b57cec5SDimitry Andric}
68480b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
68490b57cec5SDimitry Andric  (ins VR128:$src1),
68500b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
68510b57cec5SDimitry Andric  [(set VR128:$dst,
68520b57cec5SDimitry Andric    (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>;
68530b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
68540b57cec5SDimitry Andric  (ins i128mem:$src1),
68550b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
68560b57cec5SDimitry Andric  [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
68570b57cec5SDimitry Andric  Sched<[WriteAESIMC.Folded]>;
68580b57cec5SDimitry Andric
68590b57cec5SDimitry Andric// AES Round Key Generation Assist
68600b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
68610b57cec5SDimitry Andric  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
68620b57cec5SDimitry Andric      (ins VR128:$src1, u8imm:$src2),
68630b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68640b57cec5SDimitry Andric      [(set VR128:$dst,
68658bcb0991SDimitry Andric        (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
686606c3fb27SDimitry Andric      Sched<[WriteAESKeyGen]>, VEX, WIG;
68670b57cec5SDimitry Andric  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
68680b57cec5SDimitry Andric      (ins i128mem:$src1, u8imm:$src2),
68690b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68700b57cec5SDimitry Andric      [(set VR128:$dst,
68718bcb0991SDimitry Andric        (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
687206c3fb27SDimitry Andric      Sched<[WriteAESKeyGen.Folded]>, VEX, WIG;
68730b57cec5SDimitry Andric}
68740b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
68750b57cec5SDimitry Andric  (ins VR128:$src1, u8imm:$src2),
68760b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68770b57cec5SDimitry Andric  [(set VR128:$dst,
68788bcb0991SDimitry Andric    (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
68790b57cec5SDimitry Andric  Sched<[WriteAESKeyGen]>;
68800b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
68810b57cec5SDimitry Andric  (ins i128mem:$src1, u8imm:$src2),
68820b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68830b57cec5SDimitry Andric  [(set VR128:$dst,
68848bcb0991SDimitry Andric    (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
68850b57cec5SDimitry Andric  Sched<[WriteAESKeyGen.Folded]>;
68860b57cec5SDimitry Andric
68870b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
68880b57cec5SDimitry Andric// PCLMUL Instructions
68890b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
68900b57cec5SDimitry Andric
68910b57cec5SDimitry Andric// Immediate transform to help with commuting.
68928bcb0991SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<timm, [{
68930b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
68940b57cec5SDimitry Andric  return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
68950b57cec5SDimitry Andric}]>;
68960b57cec5SDimitry Andric
68970b57cec5SDimitry Andric// SSE carry-less Multiplication instructions
68980b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in {
68990b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
69000b57cec5SDimitry Andric    let isCommutable = 1 in
69010b57cec5SDimitry Andric    def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
69020b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2, u8imm:$src3),
69030b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
69040b57cec5SDimitry Andric              [(set VR128:$dst,
69058bcb0991SDimitry Andric                (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
69060b57cec5SDimitry Andric                Sched<[WriteCLMul]>;
69070b57cec5SDimitry Andric
69080b57cec5SDimitry Andric    def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
69090b57cec5SDimitry Andric              (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
69100b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
69110b57cec5SDimitry Andric              [(set VR128:$dst,
69120b57cec5SDimitry Andric                 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
69138bcb0991SDimitry Andric                  timm:$src3))]>,
69140b57cec5SDimitry Andric              Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
69150b57cec5SDimitry Andric  } // Constraints = "$src1 = $dst"
69160b57cec5SDimitry Andric
69170b57cec5SDimitry Andric  def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
69188bcb0991SDimitry Andric                                (i8 timm:$src3)),
69190b57cec5SDimitry Andric            (PCLMULQDQrm VR128:$src1, addr:$src2,
69208bcb0991SDimitry Andric                          (PCLMULCommuteImm timm:$src3))>;
69210b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL]
69220b57cec5SDimitry Andric
69230b57cec5SDimitry Andric// SSE aliases
69240b57cec5SDimitry Andricforeach HI = ["hq","lq"] in
69250b57cec5SDimitry Andricforeach LO = ["hq","lq"] in {
69260b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
69270b57cec5SDimitry Andric                  (PCLMULQDQrr VR128:$dst, VR128:$src,
69280b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
69290b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
69300b57cec5SDimitry Andric                  (PCLMULQDQrm VR128:$dst, i128mem:$src,
69310b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
69320b57cec5SDimitry Andric}
69330b57cec5SDimitry Andric
69340b57cec5SDimitry Andric// AVX carry-less Multiplication instructions
69350b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
69360b57cec5SDimitry Andric                      PatFrag LdFrag, Intrinsic IntId> {
69370b57cec5SDimitry Andric  let isCommutable = 1 in
69380b57cec5SDimitry Andric  def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst),
69390b57cec5SDimitry Andric            (ins RC:$src1, RC:$src2, u8imm:$src3),
69400b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
69410b57cec5SDimitry Andric            [(set RC:$dst,
69428bcb0991SDimitry Andric              (IntId RC:$src1, RC:$src2, timm:$src3))]>,
69430b57cec5SDimitry Andric            Sched<[WriteCLMul]>;
69440b57cec5SDimitry Andric
69450b57cec5SDimitry Andric  def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
69460b57cec5SDimitry Andric            (ins RC:$src1, MemOp:$src2, u8imm:$src3),
69470b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
69480b57cec5SDimitry Andric            [(set RC:$dst,
69498bcb0991SDimitry Andric               (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
69500b57cec5SDimitry Andric            Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
69510b57cec5SDimitry Andric
69520b57cec5SDimitry Andric  // We can commute a load in the first operand by swapping the sources and
69530b57cec5SDimitry Andric  // rotating the immediate.
69548bcb0991SDimitry Andric  def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
69550b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
69568bcb0991SDimitry Andric                                           (PCLMULCommuteImm timm:$src3))>;
69570b57cec5SDimitry Andric}
69580b57cec5SDimitry Andric
69590b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
69600b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
696106c3fb27SDimitry Andric                             int_x86_pclmulqdq>, VEX_4V, WIG;
69620b57cec5SDimitry Andric
69630b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in
69640b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
696506c3fb27SDimitry Andric                              int_x86_pclmulqdq_256>, VEX_4V, VEX_L, WIG;
69660b57cec5SDimitry Andric
69670b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
69680b57cec5SDimitry Andric                                   X86MemOperand MemOp, string Hi, string Lo> {
69695ffd83dbSDimitry Andric  def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
69700b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2,
69710b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
69725ffd83dbSDimitry Andric  def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
69730b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2,
69740b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
69750b57cec5SDimitry Andric}
69760b57cec5SDimitry Andric
69770b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC,
69780b57cec5SDimitry Andric                              X86MemOperand MemOp> {
69790b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">;
69800b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">;
69810b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">;
69820b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">;
69830b57cec5SDimitry Andric}
69840b57cec5SDimitry Andric
69850b57cec5SDimitry Andric// AVX aliases
69860b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
69870b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
69880b57cec5SDimitry Andric
69890b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69900b57cec5SDimitry Andric// SSE4A Instructions
69910b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69920b57cec5SDimitry Andric
69930b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in {
69940b57cec5SDimitry Andric
69950b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
69960b57cec5SDimitry Andriclet Constraints = "$src = $dst" in {
69970b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
69980b57cec5SDimitry Andric                 (ins VR128:$src, u8imm:$len, u8imm:$idx),
69990b57cec5SDimitry Andric                 "extrq\t{$idx, $len, $src|$src, $len, $idx}",
70008bcb0991SDimitry Andric                 [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
70018bcb0991SDimitry Andric                                    timm:$idx))]>,
70020b57cec5SDimitry Andric                 PD, Sched<[SchedWriteVecALU.XMM]>;
70030b57cec5SDimitry Andricdef EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
70040b57cec5SDimitry Andric              (ins VR128:$src, VR128:$mask),
70050b57cec5SDimitry Andric              "extrq\t{$mask, $src|$src, $mask}",
70060b57cec5SDimitry Andric              [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
70070b57cec5SDimitry Andric                                 VR128:$mask))]>,
70080b57cec5SDimitry Andric              PD, Sched<[SchedWriteVecALU.XMM]>;
70090b57cec5SDimitry Andric
70100b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
70110b57cec5SDimitry Andric                   (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
70120b57cec5SDimitry Andric                   "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
70130b57cec5SDimitry Andric                   [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
70148bcb0991SDimitry Andric                                      timm:$len, timm:$idx))]>,
70150b57cec5SDimitry Andric                   XD, Sched<[SchedWriteVecALU.XMM]>;
70160b57cec5SDimitry Andricdef INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
70170b57cec5SDimitry Andric                 (ins VR128:$src, VR128:$mask),
70180b57cec5SDimitry Andric                 "insertq\t{$mask, $src|$src, $mask}",
70190b57cec5SDimitry Andric                 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
70200b57cec5SDimitry Andric                                    VR128:$mask))]>,
70210b57cec5SDimitry Andric                 XD, Sched<[SchedWriteVecALU.XMM]>;
70220b57cec5SDimitry Andric}
70230b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
70240b57cec5SDimitry Andric
70250b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores.
70260b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
70270b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
70280b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
70290b57cec5SDimitry Andric                "movntss\t{$src, $dst|$dst, $src}", []>, XS;
70300b57cec5SDimitry Andric
70310b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
70320b57cec5SDimitry Andric                "movntsd\t{$src, $dst|$dst, $src}", []>, XD;
70330b57cec5SDimitry Andric} // SchedRW
70340b57cec5SDimitry Andric
70350b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst),
70360b57cec5SDimitry Andric          (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
70370b57cec5SDimitry Andric
70380b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst),
70390b57cec5SDimitry Andric          (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
70400b57cec5SDimitry Andric
70410b57cec5SDimitry Andric} // AddedComplexity
70420b57cec5SDimitry Andric} // HasSSE4A
70430b57cec5SDimitry Andric
70440b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70450b57cec5SDimitry Andric// AVX Instructions
70460b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70470b57cec5SDimitry Andric
70480b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70490b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the
70500b57cec5SDimitry Andric//              destination operand
70510b57cec5SDimitry Andric//
70520b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
70530b57cec5SDimitry Andric                           X86MemOperand x86memop, ValueType VT,
70548bcb0991SDimitry Andric                           PatFrag bcast_frag, SchedWrite Sched> :
70550b57cec5SDimitry Andric  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
70560b57cec5SDimitry Andric        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
70578bcb0991SDimitry Andric        [(set RC:$dst, (VT (bcast_frag addr:$src)))]>,
70580b57cec5SDimitry Andric        Sched<[Sched]>, VEX;
70590b57cec5SDimitry Andric
70600b57cec5SDimitry Andric// AVX2 adds register forms
70610b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
70620b57cec5SDimitry Andric                        ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
70630b57cec5SDimitry Andric  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
70640b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
70650b57cec5SDimitry Andric         [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
70660b57cec5SDimitry Andric         Sched<[Sched]>, VEX;
70670b57cec5SDimitry Andric
70680b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
70690b57cec5SDimitry Andric  def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
70708bcb0991SDimitry Andric                                         f32mem, v4f32, X86VBroadcastld32,
70710b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>;
70720b57cec5SDimitry Andric  def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
70738bcb0991SDimitry Andric                                         f32mem, v8f32, X86VBroadcastld32,
70740b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>, VEX_L;
70750b57cec5SDimitry Andric}
70760b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in
70770b57cec5SDimitry Andricdef VBROADCASTSDYrm  : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
70788bcb0991SDimitry Andric                                        v4f64, X86VBroadcastld64,
70790b57cec5SDimitry Andric                                        SchedWriteFShuffle.XMM.Folded>, VEX_L;
70800b57cec5SDimitry Andric
70810b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in {
70820b57cec5SDimitry Andric  def VBROADCASTSSrr  : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
70830b57cec5SDimitry Andric                                          v4f32, v4f32, SchedWriteFShuffle.XMM>;
70840b57cec5SDimitry Andric  def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
70850b57cec5SDimitry Andric                                          v8f32, v4f32, WriteFShuffle256>, VEX_L;
70860b57cec5SDimitry Andric}
70870b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
70880b57cec5SDimitry Andricdef VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
70890b57cec5SDimitry Andric                                         v4f64, v2f64, WriteFShuffle256>, VEX_L;
70900b57cec5SDimitry Andric
70910b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70920b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
70930b57cec5SDimitry Andric//                  halves of a 256-bit vector.
70940b57cec5SDimitry Andric//
70950b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
70960b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
70970b57cec5SDimitry Andric                           (ins i128mem:$src),
70980b57cec5SDimitry Andric                           "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
70990b57cec5SDimitry Andric                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
71000b57cec5SDimitry Andric
71010b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
71020b57cec5SDimitry Andric    ExeDomain = SSEPackedSingle in
71030b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
71040b57cec5SDimitry Andric                           (ins f128mem:$src),
71050b57cec5SDimitry Andric                           "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
71060b57cec5SDimitry Andric                           Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
71070b57cec5SDimitry Andric
71080b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7109e8d8bef9SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
71100b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7111e8d8bef9SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
71120b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
71130b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can
71140b57cec5SDimitry Andric// convert to integer when profitable.
7115e8d8bef9SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
71160b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7117e8d8bef9SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
71180b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7119e8d8bef9SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
71200b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7121fcaf7f86SDimitry Andricdef : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
7122fcaf7f86SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7123e8d8bef9SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
71240b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
71250b57cec5SDimitry Andric}
71260b57cec5SDimitry Andric
71270b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7128349cc55cSDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
7129349cc55cSDimitry Andric//
7130349cc55cSDimitry Andric
7131349cc55cSDimitry Andriclet ExeDomain = SSEPackedSingle in {
7132349cc55cSDimitry Andriclet isCommutable = 1 in
7133349cc55cSDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
7134349cc55cSDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7135349cc55cSDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7136349cc55cSDimitry Andric          VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
7137349cc55cSDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
7138349cc55cSDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7139349cc55cSDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7140349cc55cSDimitry Andric          VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
7141349cc55cSDimitry Andric}
7142349cc55cSDimitry Andric
7143349cc55cSDimitry Andric// Immediate transform to help with commuting.
7144349cc55cSDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<timm, [{
7145349cc55cSDimitry Andric  return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
7146349cc55cSDimitry Andric}]>;
7147349cc55cSDimitry Andric
7148349cc55cSDimitry Andricmulticlass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
7149349cc55cSDimitry Andric  def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
7150349cc55cSDimitry Andric            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
7151349cc55cSDimitry Andric  def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
7152349cc55cSDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
7153349cc55cSDimitry Andric  // Pattern with load in other operand.
7154349cc55cSDimitry Andric  def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
7155349cc55cSDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
7156349cc55cSDimitry Andric                                             (Perm2XCommuteImm timm:$imm))>;
7157349cc55cSDimitry Andric}
7158349cc55cSDimitry Andric
7159349cc55cSDimitry Andriclet Predicates = [HasAVX] in {
7160349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
7161349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
7162349cc55cSDimitry Andric}
7163349cc55cSDimitry Andric
7164349cc55cSDimitry Andriclet Predicates = [HasAVX1Only] in {
7165349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v4i64,  loadv4i64>;
7166349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v8i32,  loadv8i32>;
7167349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
7168fcaf7f86SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>;
7169349cc55cSDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v32i8,  loadv32i8>;
7170349cc55cSDimitry Andric}
7171349cc55cSDimitry Andric
7172349cc55cSDimitry Andric//===----------------------------------------------------------------------===//
71730b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values
71740b57cec5SDimitry Andric//
71750b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
71760b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
71770b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
71780b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
71790b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
71800b57cec5SDimitry Andriclet mayLoad = 1 in
71810b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
71820b57cec5SDimitry Andric          (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
71830b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
71840b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
71850b57cec5SDimitry Andric}
71860b57cec5SDimitry Andric
71870b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS
71880b57cec5SDimitry Andric// with YMM register containing zero.
71890b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs.
71900b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
71910b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
71920b57cec5SDimitry Andric}
71930b57cec5SDimitry Andric
7194349cc55cSDimitry Andricmulticlass vinsert_lowering<string InstrStr, string PermStr,
7195349cc55cSDimitry Andric                            ValueType From, ValueType To,
7196349cc55cSDimitry Andric                            PatFrag frommemop_frag, PatFrag tomemop_frag> {
71970b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
71980b57cec5SDimitry Andric                                   (iPTR imm)),
71990b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
72000b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
72010b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
7202349cc55cSDimitry Andric                                    (From (frommemop_frag addr:$src2)),
72030b57cec5SDimitry Andric                                    (iPTR imm)),
72040b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
72050b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
7206349cc55cSDimitry Andric  // Folding "To" vector - convert to perm2x128 and commute inputs.
7207349cc55cSDimitry Andric  def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)),
7208349cc55cSDimitry Andric                                    (From VR128:$src2),
7209349cc55cSDimitry Andric                                    (iPTR imm)),
7210349cc55cSDimitry Andric            (!cast<Instruction>(PermStr#rm)
7211349cc55cSDimitry Andric              (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm),
7212349cc55cSDimitry Andric              addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>;
72130b57cec5SDimitry Andric}
72140b57cec5SDimitry Andric
72150b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7216349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>;
7217349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>;
72180b57cec5SDimitry Andric}
72190b57cec5SDimitry Andric
72200b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7221349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64,  loadv2i64, loadv4i64>;
7222349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32,  loadv4i32, loadv8i32>;
7223349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>;
7224fcaf7f86SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>;
7225fcaf7f86SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8,  loadv16i8, loadv32i8>;
7226349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8,  loadv16i8, loadv32i8>;
72270b57cec5SDimitry Andric}
72280b57cec5SDimitry Andric
72290b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
72300b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values
72310b57cec5SDimitry Andric//
72320b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
72330b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
72340b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
72350b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
72360b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX, VEX_L;
72370b57cec5SDimitry Andriclet mayStore = 1 in
72380b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
72390b57cec5SDimitry Andric          (ins f128mem:$dst, VR256:$src1, u8imm:$src2),
72400b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
72410b57cec5SDimitry Andric          []>, Sched<[WriteFStoreX]>, VEX, VEX_L;
72420b57cec5SDimitry Andric}
72430b57cec5SDimitry Andric
72440b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> {
72450b57cec5SDimitry Andric  def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
72460b57cec5SDimitry Andric            (To (!cast<Instruction>(InstrStr#rr)
72470b57cec5SDimitry Andric                                    (From VR256:$src1),
72480b57cec5SDimitry Andric                                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
72490b57cec5SDimitry Andric  def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1),
72500b57cec5SDimitry Andric                                                 (iPTR imm))), addr:$dst),
72510b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1,
72520b57cec5SDimitry Andric             (EXTRACT_get_vextract128_imm VR128:$ext))>;
72530b57cec5SDimitry Andric}
72540b57cec5SDimitry Andric
72550b57cec5SDimitry Andric// AVX1 patterns
72560b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
72570b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>;
72580b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>;
72590b57cec5SDimitry Andric}
72600b57cec5SDimitry Andric
72610b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
72620b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4i64,  v2i64>;
72630b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8i32,  v4i32>;
72640b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
7265fcaf7f86SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>;
7266fcaf7f86SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v32i8,  v16i8>;
72670b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v32i8,  v16i8>;
72680b57cec5SDimitry Andric}
72690b57cec5SDimitry Andric
72700b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
72710b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores
72720b57cec5SDimitry Andric//
72730b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
72740b57cec5SDimitry Andric                          Intrinsic IntLd, Intrinsic IntLd256,
72758bcb0991SDimitry Andric                          Intrinsic IntSt, Intrinsic IntSt256,
72768bcb0991SDimitry Andric                          X86SchedWriteMaskMove schedX,
72778bcb0991SDimitry Andric                          X86SchedWriteMaskMove schedY> {
72780b57cec5SDimitry Andric  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
72790b57cec5SDimitry Andric             (ins VR128:$src1, f128mem:$src2),
72800b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72810b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
72828bcb0991SDimitry Andric             VEX_4V, Sched<[schedX.RM]>;
72830b57cec5SDimitry Andric  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
72840b57cec5SDimitry Andric             (ins VR256:$src1, f256mem:$src2),
72850b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72860b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
72878bcb0991SDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.RM]>;
72880b57cec5SDimitry Andric  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
72890b57cec5SDimitry Andric             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
72900b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72910b57cec5SDimitry Andric             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
72928bcb0991SDimitry Andric             VEX_4V, Sched<[schedX.MR]>;
72930b57cec5SDimitry Andric  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
72940b57cec5SDimitry Andric             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
72950b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72960b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
72978bcb0991SDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.MR]>;
72980b57cec5SDimitry Andric}
72990b57cec5SDimitry Andric
73000b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
73010b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
73020b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps,
73030b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps_256,
73040b57cec5SDimitry Andric                                 int_x86_avx_maskstore_ps,
73058bcb0991SDimitry Andric                                 int_x86_avx_maskstore_ps_256,
73068bcb0991SDimitry Andric                                 WriteFMaskMove32, WriteFMaskMove32Y>;
73070b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
73080b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
73090b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd,
73100b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd_256,
73110b57cec5SDimitry Andric                                 int_x86_avx_maskstore_pd,
73128bcb0991SDimitry Andric                                 int_x86_avx_maskstore_pd_256,
73138bcb0991SDimitry Andric                                 WriteFMaskMove64, WriteFMaskMove64Y>;
73140b57cec5SDimitry Andric
73150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7316e8d8bef9SDimitry Andric// AVX_VNNI
7317e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
7318fe6060f1SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst",
7319fe6060f1SDimitry Andric    ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in
7320e8d8bef9SDimitry Andricmulticlass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7321e8d8bef9SDimitry Andric                       bit IsCommutable> {
7322e8d8bef9SDimitry Andric  let isCommutable = IsCommutable in
7323e8d8bef9SDimitry Andric  def rr  : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
7324e8d8bef9SDimitry Andric             (ins VR128:$src1, VR128:$src2, VR128:$src3),
7325e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7326e8d8bef9SDimitry Andric             [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
7327e8d8bef9SDimitry Andric                                       VR128:$src2, VR128:$src3)))]>,
7328e8d8bef9SDimitry Andric             VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
7329e8d8bef9SDimitry Andric
7330e8d8bef9SDimitry Andric  def rm  : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
7331e8d8bef9SDimitry Andric             (ins VR128:$src1, VR128:$src2, i128mem:$src3),
7332e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7333e8d8bef9SDimitry Andric             [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
7334e8d8bef9SDimitry Andric                                      (loadv4i32 addr:$src3))))]>,
733506c3fb27SDimitry Andric             VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded,
733606c3fb27SDimitry Andric                            SchedWriteVecIMul.XMM.ReadAfterFold,
733706c3fb27SDimitry Andric                            SchedWriteVecIMul.XMM.ReadAfterFold]>;
7338e8d8bef9SDimitry Andric
7339e8d8bef9SDimitry Andric  let isCommutable = IsCommutable in
7340e8d8bef9SDimitry Andric  def Yrr  : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
7341e8d8bef9SDimitry Andric             (ins VR256:$src1, VR256:$src2, VR256:$src3),
7342e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7343e8d8bef9SDimitry Andric             [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
7344e8d8bef9SDimitry Andric                                       VR256:$src2, VR256:$src3)))]>,
734506c3fb27SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
7346e8d8bef9SDimitry Andric
7347e8d8bef9SDimitry Andric  def Yrm  : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
7348e8d8bef9SDimitry Andric             (ins VR256:$src1, VR256:$src2, i256mem:$src3),
7349e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7350e8d8bef9SDimitry Andric             [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
7351e8d8bef9SDimitry Andric                                      (loadv8i32 addr:$src3))))]>,
735206c3fb27SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
735306c3fb27SDimitry Andric                                   SchedWriteVecIMul.YMM.ReadAfterFold,
735406c3fb27SDimitry Andric                                   SchedWriteVecIMul.YMM.ReadAfterFold]>;
7355e8d8bef9SDimitry Andric}
7356e8d8bef9SDimitry Andric
7357fe6060f1SDimitry Andricdefm VPDPBUSD   : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;
7358fe6060f1SDimitry Andricdefm VPDPBUSDS  : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>;
7359fe6060f1SDimitry Andricdefm VPDPWSSD   : avx_vnni_rm<0x52, "vpdpwssd",  X86Vpdpwssd, 1>;
7360fe6060f1SDimitry Andricdefm VPDPWSSDS  : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>;
7361e8d8bef9SDimitry Andric
7362e8d8bef9SDimitry Andricdef X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
7363e8d8bef9SDimitry Andric                             (X86vpmaddwd node:$lhs, node:$rhs), [{
7364e8d8bef9SDimitry Andric  return N->hasOneUse();
7365e8d8bef9SDimitry Andric}]>;
7366e8d8bef9SDimitry Andric
7367e8d8bef9SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
7368e8d8bef9SDimitry Andric  def : Pat<(v8i32 (add VR256:$src1,
7369e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
7370e8d8bef9SDimitry Andric            (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
7371e8d8bef9SDimitry Andric  def : Pat<(v8i32 (add VR256:$src1,
7372e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
7373e8d8bef9SDimitry Andric            (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
7374e8d8bef9SDimitry Andric  def : Pat<(v4i32 (add VR128:$src1,
7375e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
7376e8d8bef9SDimitry Andric            (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
7377e8d8bef9SDimitry Andric  def : Pat<(v4i32 (add VR128:$src1,
7378e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
7379e8d8bef9SDimitry Andric            (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
7380e8d8bef9SDimitry Andric}
7381e8d8bef9SDimitry Andric
7382e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
73830b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values
73840b57cec5SDimitry Andric//
73850b57cec5SDimitry Andric
73860b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
73870b57cec5SDimitry Andric                      RegisterClass RC, X86MemOperand x86memop_f,
73880b57cec5SDimitry Andric                      X86MemOperand x86memop_i,
73890b57cec5SDimitry Andric                      ValueType f_vt, ValueType i_vt,
73900b57cec5SDimitry Andric                      X86FoldableSchedWrite sched,
73910b57cec5SDimitry Andric                      X86FoldableSchedWrite varsched> {
73920b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
73930b57cec5SDimitry Andric    def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
73940b57cec5SDimitry Andric               (ins RC:$src1, RC:$src2),
73950b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
73960b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
73970b57cec5SDimitry Andric               Sched<[varsched]>;
73980b57cec5SDimitry Andric    def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
73990b57cec5SDimitry Andric               (ins RC:$src1, x86memop_i:$src2),
74000b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
74010b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
74020b57cec5SDimitry Andric                              (i_vt (load addr:$src2)))))]>, VEX_4V,
74030b57cec5SDimitry Andric               Sched<[varsched.Folded, sched.ReadAfterFold]>;
74040b57cec5SDimitry Andric
74050b57cec5SDimitry Andric    def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
74060b57cec5SDimitry Andric             (ins RC:$src1, u8imm:$src2),
74070b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
74088bcb0991SDimitry Andric             [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
74090b57cec5SDimitry Andric             Sched<[sched]>;
74100b57cec5SDimitry Andric    def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
74110b57cec5SDimitry Andric             (ins x86memop_f:$src1, u8imm:$src2),
74120b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
74130b57cec5SDimitry Andric             [(set RC:$dst,
74148bcb0991SDimitry Andric               (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
74150b57cec5SDimitry Andric             Sched<[sched.Folded]>;
74160b57cec5SDimitry Andric  }// Predicates = [HasAVX, NoVLX]
74170b57cec5SDimitry Andric}
74180b57cec5SDimitry Andric
74190b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
74200b57cec5SDimitry Andric  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
74210b57cec5SDimitry Andric                               v4f32, v4i32, SchedWriteFShuffle.XMM,
74220b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
74230b57cec5SDimitry Andric  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
74240b57cec5SDimitry Andric                               v8f32, v8i32, SchedWriteFShuffle.YMM,
74250b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
74260b57cec5SDimitry Andric}
74270b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
74280b57cec5SDimitry Andric  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
74290b57cec5SDimitry Andric                               v2f64, v2i64, SchedWriteFShuffle.XMM,
74300b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
74310b57cec5SDimitry Andric  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
74320b57cec5SDimitry Andric                               v4f64, v4i64, SchedWriteFShuffle.YMM,
74330b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
74340b57cec5SDimitry Andric}
74350b57cec5SDimitry Andric
74360b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
74370b57cec5SDimitry Andric// VZERO - Zero YMM registers
74380b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31.
74390b57cec5SDimitry Andric//
74400b57cec5SDimitry Andric
74410b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
74420b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
74430b57cec5SDimitry Andric            YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
74440b57cec5SDimitry Andric  // Zero All YMM registers
74450b57cec5SDimitry Andric  def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
74460b57cec5SDimitry Andric                  [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
744706c3fb27SDimitry Andric                  Requires<[HasAVX]>, WIG;
74480b57cec5SDimitry Andric
74490b57cec5SDimitry Andric  // Zero Upper bits of YMM registers
74500b57cec5SDimitry Andric  def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
74510b57cec5SDimitry Andric                     [(int_x86_avx_vzeroupper)]>, PS, VEX,
745206c3fb27SDimitry Andric                     Requires<[HasAVX]>, WIG;
74530b57cec5SDimitry Andric} // Defs
74540b57cec5SDimitry Andric} // SchedRW
74550b57cec5SDimitry Andric
74560b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
74570b57cec5SDimitry Andric// Half precision conversion instructions
74580b57cec5SDimitry Andric//
74590b57cec5SDimitry Andric
74600b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
74610b57cec5SDimitry Andric                      X86FoldableSchedWrite sched> {
74620b57cec5SDimitry Andric  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
74630b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
74645ffd83dbSDimitry Andric             [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
74650b57cec5SDimitry Andric             T8PD, VEX, Sched<[sched]>;
74660b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
74670b57cec5SDimitry Andric  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
74680b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
74695ffd83dbSDimitry Andric             []>, T8PD, VEX, Sched<[sched.Folded]>;
74700b57cec5SDimitry Andric}
74710b57cec5SDimitry Andric
74720b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
74730b57cec5SDimitry Andric                      SchedWrite RR, SchedWrite MR> {
74740b57cec5SDimitry Andric  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
74750b57cec5SDimitry Andric               (ins RC:$src1, i32u8imm:$src2),
74760b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
74775ffd83dbSDimitry Andric               [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
74780b57cec5SDimitry Andric               TAPD, VEX, Sched<[RR]>;
74790b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
74800b57cec5SDimitry Andric  def mr : Ii8<0x1D, MRMDestMem, (outs),
74810b57cec5SDimitry Andric               (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
74820b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
74830b57cec5SDimitry Andric               TAPD, VEX, Sched<[MR]>;
74840b57cec5SDimitry Andric}
74850b57cec5SDimitry Andric
74860b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in {
7487480093f4SDimitry Andric  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
7488480093f4SDimitry Andric  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
74890b57cec5SDimitry Andric  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
7490480093f4SDimitry Andric                               WriteCvtPS2PHSt>, SIMD_EXC;
74910b57cec5SDimitry Andric  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
7492480093f4SDimitry Andric                               WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
74930b57cec5SDimitry Andric
74940b57cec5SDimitry Andric  // Pattern match vcvtph2ps of a scalar i64 load.
74955ffd83dbSDimitry Andric  def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
74960b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
74975ffd83dbSDimitry Andric  def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
74980b57cec5SDimitry Andric              (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
74990b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
75005ffd83dbSDimitry Andric  def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))),
75015ffd83dbSDimitry Andric            (VCVTPH2PSYrm addr:$src)>;
75020b57cec5SDimitry Andric
75030b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
75045ffd83dbSDimitry Andric                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
75050b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
75068bcb0991SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
75070b57cec5SDimitry Andric  def : Pat<(store (i64 (extractelt
75085ffd83dbSDimitry Andric                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
75090b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
75108bcb0991SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
75115ffd83dbSDimitry Andric  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
75128bcb0991SDimitry Andric            (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
75130b57cec5SDimitry Andric}
75140b57cec5SDimitry Andric
75150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
75160b57cec5SDimitry Andric// AVX2 Instructions
75170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
75180b57cec5SDimitry Andric
75190b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
75200b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
75210b57cec5SDimitry Andric                          ValueType OpVT, X86FoldableSchedWrite sched,
75220b57cec5SDimitry Andric                          RegisterClass RC,
75230b57cec5SDimitry Andric                          X86MemOperand x86memop, SDNodeXForm commuteXForm> {
75240b57cec5SDimitry Andric  let isCommutable = 1 in
75250b57cec5SDimitry Andric  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
75260b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
75270b57cec5SDimitry Andric        !strconcat(OpcodeStr,
75280b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
75298bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
75300b57cec5SDimitry Andric        Sched<[sched]>, VEX_4V;
75310b57cec5SDimitry Andric  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
75320b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
75330b57cec5SDimitry Andric        !strconcat(OpcodeStr,
75340b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
75350b57cec5SDimitry Andric        [(set RC:$dst,
75368bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
75370b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
75380b57cec5SDimitry Andric
75390b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
75408bcb0991SDimitry Andric  def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
75410b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
75428bcb0991SDimitry Andric                                            (commuteXForm timm:$src3))>;
75430b57cec5SDimitry Andric}
75440b57cec5SDimitry Andric
75450b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
75460b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
75470b57cec5SDimitry Andric                               SchedWriteBlend.XMM, VR128, i128mem,
75480b57cec5SDimitry Andric                               BlendCommuteImm4>;
75490b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
75500b57cec5SDimitry Andric                                SchedWriteBlend.YMM, VR256, i256mem,
75510b57cec5SDimitry Andric                                BlendCommuteImm8>, VEX_L;
75520b57cec5SDimitry Andric
75538bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
75548bcb0991SDimitry Andric          (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>;
75558bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
75568bcb0991SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
75578bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
75588bcb0991SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
75590b57cec5SDimitry Andric
75608bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
75618bcb0991SDimitry Andric          (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>;
75628bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
75638bcb0991SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>;
75648bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
75658bcb0991SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>;
75660b57cec5SDimitry Andric}
75670b57cec5SDimitry Andric
75680b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
75690b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
75705ffd83dbSDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing should
75710b57cec5SDimitry Andric// take care of using integer instructions when profitable.
75720b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
75730b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
75740b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
75750b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75760b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
75770b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
75780b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
75790b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75800b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
75810b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
75820b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
75830b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75840b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
7585fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)),
7586fcaf7f86SDimitry Andric          (VBLENDPSYrri VR256:$src1,
7587fcaf7f86SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7588fcaf7f86SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
75890b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
75900b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
75910b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75920b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
75930b57cec5SDimitry Andric
75940b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
75950b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75960b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
75970b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
75980b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75990b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
76000b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
76010b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
76020b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
7603fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)),
7604fcaf7f86SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7605fcaf7f86SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
76060b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
76070b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
76080b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
76090b57cec5SDimitry Andric}
76100b57cec5SDimitry Andric
76110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
76120b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the
76130b57cec5SDimitry Andric//               destination operand
76140b57cec5SDimitry Andric//
76150b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr,
76168bcb0991SDimitry Andric                          X86MemOperand x86memop, PatFrag bcast_frag,
76170b57cec5SDimitry Andric                          ValueType OpVT128, ValueType OpVT256, Predicate prd> {
76180b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in {
76190b57cec5SDimitry Andric    def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
76200b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
76210b57cec5SDimitry Andric                  [(set VR128:$dst,
76220b57cec5SDimitry Andric                   (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
76230b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM]>, VEX;
76240b57cec5SDimitry Andric    def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
76250b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
76260b57cec5SDimitry Andric                  [(set VR128:$dst,
76278bcb0991SDimitry Andric                   (OpVT128 (bcast_frag addr:$src)))]>,
76280b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM.Folded]>, VEX;
76290b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
76300b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
76310b57cec5SDimitry Andric                   [(set VR256:$dst,
76320b57cec5SDimitry Andric                    (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
76330b57cec5SDimitry Andric                   Sched<[WriteShuffle256]>, VEX, VEX_L;
76340b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
76350b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
76360b57cec5SDimitry Andric                   [(set VR256:$dst,
76378bcb0991SDimitry Andric                    (OpVT256 (bcast_frag addr:$src)))]>,
76380b57cec5SDimitry Andric                   Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L;
76390b57cec5SDimitry Andric
76400b57cec5SDimitry Andric    // Provide aliases for broadcast from the same register class that
76410b57cec5SDimitry Andric    // automatically does the extract.
76420b57cec5SDimitry Andric    def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
76430b57cec5SDimitry Andric              (!cast<Instruction>(NAME#"Yrr")
76440b57cec5SDimitry Andric                  (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
76450b57cec5SDimitry Andric  }
76460b57cec5SDimitry Andric}
76470b57cec5SDimitry Andric
76488bcb0991SDimitry Andricdefm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8,
76490b57cec5SDimitry Andric                                    v16i8, v32i8, NoVLX_Or_NoBWI>;
76508bcb0991SDimitry Andricdefm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16,
76510b57cec5SDimitry Andric                                    v8i16, v16i16, NoVLX_Or_NoBWI>;
76528bcb0991SDimitry Andricdefm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32,
76530b57cec5SDimitry Andric                                    v4i32, v8i32, NoVLX>;
76548bcb0991SDimitry Andricdefm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64,
76550b57cec5SDimitry Andric                                    v2i64, v4i64, NoVLX>;
76560b57cec5SDimitry Andric
76570b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
76580b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
76590b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
76600b57cec5SDimitry Andric    def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
76610b57cec5SDimitry Andric              (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
76620b57cec5SDimitry Andric    def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
76630b57cec5SDimitry Andric              (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
76640b57cec5SDimitry Andric    def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
76650b57cec5SDimitry Andric              (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
76660b57cec5SDimitry Andric}
76670b57cec5SDimitry Andric
76680b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
76690b57cec5SDimitry Andric  def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
76708bcb0991SDimitry Andric        (VPBROADCASTBrr (VMOVDI2PDIrr
76710b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
76728bcb0991SDimitry Andric                                             GR8:$src, sub_8bit))))>;
76730b57cec5SDimitry Andric  def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
76748bcb0991SDimitry Andric        (VPBROADCASTBYrr (VMOVDI2PDIrr
76750b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
76768bcb0991SDimitry Andric                                              GR8:$src, sub_8bit))))>;
76770b57cec5SDimitry Andric
76780b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
76798bcb0991SDimitry Andric        (VPBROADCASTWrr (VMOVDI2PDIrr
76800b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
76818bcb0991SDimitry Andric                                             GR16:$src, sub_16bit))))>;
76820b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
76838bcb0991SDimitry Andric        (VPBROADCASTWYrr (VMOVDI2PDIrr
76840b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
76858bcb0991SDimitry Andric                                              GR16:$src, sub_16bit))))>;
768681ad6265SDimitry Andric
768781ad6265SDimitry Andric  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
768881ad6265SDimitry Andric            (VPBROADCASTWrm addr:$src)>;
768981ad6265SDimitry Andric  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
769081ad6265SDimitry Andric            (VPBROADCASTWYrm addr:$src)>;
769181ad6265SDimitry Andric
769281ad6265SDimitry Andric  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128:$src))),
769381ad6265SDimitry Andric            (VPBROADCASTWrr VR128:$src)>;
769481ad6265SDimitry Andric  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128:$src))),
769581ad6265SDimitry Andric            (VPBROADCASTWYrr VR128:$src)>;
769681ad6265SDimitry Andric
769781ad6265SDimitry Andric  def : Pat<(v8f16 (X86VBroadcast (f16 FR16:$src))),
769881ad6265SDimitry Andric            (VPBROADCASTWrr (COPY_TO_REGCLASS FR16:$src, VR128))>;
769981ad6265SDimitry Andric  def : Pat<(v16f16 (X86VBroadcast (f16 FR16:$src))),
770081ad6265SDimitry Andric            (VPBROADCASTWYrr (COPY_TO_REGCLASS FR16:$src, VR128))>;
77010b57cec5SDimitry Andric}
77020b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
77030b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
77048bcb0991SDimitry Andric            (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>;
77050b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
77068bcb0991SDimitry Andric            (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>;
77070b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
77088bcb0991SDimitry Andric            (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>;
77090b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
77108bcb0991SDimitry Andric            (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>;
77110b57cec5SDimitry Andric}
77120b57cec5SDimitry Andric
77130b57cec5SDimitry Andric// AVX1 broadcast patterns
77140b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
77158bcb0991SDimitry Andricdef : Pat<(v8i32 (X86VBroadcastld32 addr:$src)),
77160b57cec5SDimitry Andric          (VBROADCASTSSYrm addr:$src)>;
77178bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VBroadcastld64 addr:$src)),
77180b57cec5SDimitry Andric          (VBROADCASTSDYrm addr:$src)>;
77198bcb0991SDimitry Andricdef : Pat<(v4i32 (X86VBroadcastld32 addr:$src)),
77200b57cec5SDimitry Andric          (VBROADCASTSSrm addr:$src)>;
77210b57cec5SDimitry Andric}
77220b57cec5SDimitry Andric
77230b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
77240b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
77250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
77260b57cec5SDimitry Andric  // 128bit broadcasts:
77270b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast f64:$src)),
77280b57cec5SDimitry Andric            (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
77298bcb0991SDimitry Andric  def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)),
77300b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
77310b57cec5SDimitry Andric
77320b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
77330b57cec5SDimitry Andric            (VMOVDDUPrr VR128:$src)>;
77340b57cec5SDimitry Andric}
77350b57cec5SDimitry Andric
77360b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
77370b57cec5SDimitry Andric  def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
77380b57cec5SDimitry Andric            (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>;
77390b57cec5SDimitry Andric  def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
77400b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
77410b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
77420b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
7743349cc55cSDimitry Andric  def : Pat<(v8f32 (X86VBroadcast v4f32:$src)),
7744349cc55cSDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
7745349cc55cSDimitry Andric              (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm),
7746349cc55cSDimitry Andric              (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>;
77470b57cec5SDimitry Andric  def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
77480b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
77490b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
77500b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
7751349cc55cSDimitry Andric  def : Pat<(v4f64 (X86VBroadcast v2f64:$src)),
7752349cc55cSDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
7753349cc55cSDimitry Andric              (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm),
7754349cc55cSDimitry Andric              (v2f64 (VMOVDDUPrr VR128:$src)), 1)>;
77550b57cec5SDimitry Andric
77560b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
77578bcb0991SDimitry Andric            (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
77580b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
77590b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
77608bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm),
77618bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>;
77620b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
77630b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
77648bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm),
77658bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>;
77660b57cec5SDimitry Andric
77670b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast i64:$src)),
77688bcb0991SDimitry Andric            (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>;
77698bcb0991SDimitry Andric  def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)),
77700b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
77710b57cec5SDimitry Andric}
77720b57cec5SDimitry Andric
77730b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
77740b57cec5SDimitry Andric// VPERM - Permute instructions
77750b57cec5SDimitry Andric//
77760b57cec5SDimitry Andric
77770b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr,
77780b57cec5SDimitry Andric                     ValueType OpVT, X86FoldableSchedWrite Sched,
77790b57cec5SDimitry Andric                     X86MemOperand memOp> {
77800b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
77810b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
77820b57cec5SDimitry Andric                     (ins VR256:$src1, VR256:$src2),
77830b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
77840b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77850b57cec5SDimitry Andric                     [(set VR256:$dst,
77860b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
77870b57cec5SDimitry Andric                     Sched<[Sched]>, VEX_4V, VEX_L;
77880b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
77890b57cec5SDimitry Andric                     (ins VR256:$src1, memOp:$src2),
77900b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
77910b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77920b57cec5SDimitry Andric                     [(set VR256:$dst,
77930b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1,
77940b57cec5SDimitry Andric                              (load addr:$src2))))]>,
77950b57cec5SDimitry Andric                     Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
77960b57cec5SDimitry Andric  }
77970b57cec5SDimitry Andric}
77980b57cec5SDimitry Andric
77990b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
78000b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
78010b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
78020b57cec5SDimitry Andric
78030b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
78040b57cec5SDimitry Andric                         ValueType OpVT, X86FoldableSchedWrite Sched,
78050b57cec5SDimitry Andric                         X86MemOperand memOp> {
78060b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
78070b57cec5SDimitry Andric    def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
78080b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
78090b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
78100b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78110b57cec5SDimitry Andric                       [(set VR256:$dst,
78128bcb0991SDimitry Andric                         (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
78130b57cec5SDimitry Andric                       Sched<[Sched]>, VEX, VEX_L;
78140b57cec5SDimitry Andric    def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
78150b57cec5SDimitry Andric                       (ins memOp:$src1, u8imm:$src2),
78160b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
78170b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78180b57cec5SDimitry Andric                       [(set VR256:$dst,
78190b57cec5SDimitry Andric                         (OpVT (X86VPermi (mem_frag addr:$src1),
78208bcb0991SDimitry Andric                                (i8 timm:$src2))))]>,
78210b57cec5SDimitry Andric                       Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
78220b57cec5SDimitry Andric  }
78230b57cec5SDimitry Andric}
78240b57cec5SDimitry Andric
78250b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
782606c3fb27SDimitry Andric                            WriteShuffle256, i256mem>, REX_W;
78270b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
78280b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
782906c3fb27SDimitry Andric                             WriteFShuffle256, f256mem>, REX_W;
78300b57cec5SDimitry Andric
78310b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7832e8d8bef9SDimitry Andric// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
78330b57cec5SDimitry Andric//
78340b57cec5SDimitry Andriclet isCommutable = 1 in
78350b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
78360b57cec5SDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7837e8d8bef9SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7838e8d8bef9SDimitry Andric          Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
78390b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
78400b57cec5SDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7841e8d8bef9SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
78420b57cec5SDimitry Andric          Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
78430b57cec5SDimitry Andric
7844e8d8bef9SDimitry Andriclet Predicates = [HasAVX2] in {
7845e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v4i64,  loadv4i64>;
7846e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v8i32,  loadv8i32>;
7847e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
7848fcaf7f86SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>;
7849fcaf7f86SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v32i8,  loadv32i8>;
7850e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v32i8,  loadv32i8>;
7851e8d8bef9SDimitry Andric}
78520b57cec5SDimitry Andric
78530b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
78540b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values
78550b57cec5SDimitry Andric//
78560b57cec5SDimitry Andriclet hasSideEffects = 0 in {
78570b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
78580b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
78590b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
78600b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
78610b57cec5SDimitry Andriclet mayLoad = 1 in
78620b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
78630b57cec5SDimitry Andric          (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
78640b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
78650b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
78660b57cec5SDimitry Andric}
78670b57cec5SDimitry Andric
78680b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
7869349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64,  loadv2i64,  loadv4i64>;
7870349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32,  loadv4i32,  loadv8i32>;
7871349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16,  loadv16i16>;
7872fcaf7f86SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16,  loadv16f16>;
7873fcaf7f86SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8,  loadv16i8,  loadv32i8>;
7874349cc55cSDimitry Andric  defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8,  loadv16i8,  loadv32i8>;
78750b57cec5SDimitry Andric}
78760b57cec5SDimitry Andric
78770b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
78780b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values
78790b57cec5SDimitry Andric//
78800b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
78810b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
78820b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
78830b57cec5SDimitry Andric          Sched<[WriteShuffle256]>, VEX, VEX_L;
78840b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in
78850b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
78860b57cec5SDimitry Andric          (ins i128mem:$dst, VR256:$src1, u8imm:$src2),
78870b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
78880b57cec5SDimitry Andric          Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L;
78890b57cec5SDimitry Andric
78900b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
78910b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v4i64,  v2i64>;
78920b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v8i32,  v4i32>;
78930b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
7894fcaf7f86SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>;
7895fcaf7f86SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v32i8,  v16i8>;
78960b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v32i8,  v16i8>;
78970b57cec5SDimitry Andric}
78980b57cec5SDimitry Andric
78990b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
79000b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
79010b57cec5SDimitry Andric//
79020b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr,
79030b57cec5SDimitry Andric                         Intrinsic IntLd128, Intrinsic IntLd256,
79045ffd83dbSDimitry Andric                         Intrinsic IntSt128, Intrinsic IntSt256,
79055ffd83dbSDimitry Andric                         X86SchedWriteMaskMove schedX,
79065ffd83dbSDimitry Andric                         X86SchedWriteMaskMove schedY> {
79070b57cec5SDimitry Andric  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
79080b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
79090b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79100b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
79115ffd83dbSDimitry Andric             VEX_4V, Sched<[schedX.RM]>;
79120b57cec5SDimitry Andric  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
79130b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
79140b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79150b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
79165ffd83dbSDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.RM]>;
79170b57cec5SDimitry Andric  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
79180b57cec5SDimitry Andric             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
79190b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79200b57cec5SDimitry Andric             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
79215ffd83dbSDimitry Andric             VEX_4V, Sched<[schedX.MR]>;
79220b57cec5SDimitry Andric  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
79230b57cec5SDimitry Andric             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
79240b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79250b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
79265ffd83dbSDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.MR]>;
79270b57cec5SDimitry Andric}
79280b57cec5SDimitry Andric
79290b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
79300b57cec5SDimitry Andric                                int_x86_avx2_maskload_d,
79310b57cec5SDimitry Andric                                int_x86_avx2_maskload_d_256,
79320b57cec5SDimitry Andric                                int_x86_avx2_maskstore_d,
79335ffd83dbSDimitry Andric                                int_x86_avx2_maskstore_d_256,
79345ffd83dbSDimitry Andric                                WriteVecMaskMove32, WriteVecMaskMove32Y>;
79350b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
79360b57cec5SDimitry Andric                                int_x86_avx2_maskload_q,
79370b57cec5SDimitry Andric                                int_x86_avx2_maskload_q_256,
79380b57cec5SDimitry Andric                                int_x86_avx2_maskstore_q,
79395ffd83dbSDimitry Andric                                int_x86_avx2_maskstore_q_256,
794006c3fb27SDimitry Andric                                WriteVecMaskMove64, WriteVecMaskMove64Y>, REX_W;
79410b57cec5SDimitry Andric
79420b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
79438bcb0991SDimitry Andric                          ValueType MaskVT> {
79440b57cec5SDimitry Andric    // masked store
79450b57cec5SDimitry Andric    def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
79460b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
79470b57cec5SDimitry Andric    // masked load
79480b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
79490b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
79500b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
79510b57cec5SDimitry Andric                              (VT immAllZerosV))),
79520b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
79530b57cec5SDimitry Andric}
79540b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
79558bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>;
79568bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>;
79578bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>;
79588bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>;
79590b57cec5SDimitry Andric}
79600b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
79610b57cec5SDimitry Andric  // load/store i32/i64 not supported use ps/pd version
79628bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>;
79638bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>;
79648bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>;
79658bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>;
79660b57cec5SDimitry Andric}
79670b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
79688bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>;
79698bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>;
79708bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>;
79718bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>;
79720b57cec5SDimitry Andric}
79730b57cec5SDimitry Andric
79740b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
79750b57cec5SDimitry Andric// Variable Bit Shifts
79760b57cec5SDimitry Andric//
79770b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
79780b57cec5SDimitry Andric                          ValueType vt128, ValueType vt256> {
79790b57cec5SDimitry Andric  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
79800b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
79810b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79820b57cec5SDimitry Andric             [(set VR128:$dst,
79830b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
79840b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>;
79850b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
79860b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
79870b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79880b57cec5SDimitry Andric             [(set VR128:$dst,
79890b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1,
79900b57cec5SDimitry Andric                       (vt128 (load addr:$src2)))))]>,
79910b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
79920b57cec5SDimitry Andric                            SchedWriteVarVecShift.XMM.ReadAfterFold]>;
79930b57cec5SDimitry Andric  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
79940b57cec5SDimitry Andric             (ins VR256:$src1, VR256:$src2),
79950b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
79960b57cec5SDimitry Andric             [(set VR256:$dst,
79970b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
79980b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
79990b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
80000b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
80010b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
80020b57cec5SDimitry Andric             [(set VR256:$dst,
80030b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1,
80040b57cec5SDimitry Andric                       (vt256 (load addr:$src2)))))]>,
80050b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
80060b57cec5SDimitry Andric                                   SchedWriteVarVecShift.YMM.ReadAfterFold]>;
80070b57cec5SDimitry Andric}
80080b57cec5SDimitry Andric
80090b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
80100b57cec5SDimitry Andric  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
801106c3fb27SDimitry Andric  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, REX_W;
80120b57cec5SDimitry Andric  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
801306c3fb27SDimitry Andric  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, REX_W;
80140b57cec5SDimitry Andric  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
80150b57cec5SDimitry Andric}
80160b57cec5SDimitry Andric
80170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80180b57cec5SDimitry Andric// VGATHER - GATHER Operations
80190b57cec5SDimitry Andric
80200b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions.
8021349cc55cSDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
8022349cc55cSDimitry Andric                       X86MemOperand memop128, X86MemOperand memop256> {
80235ffd83dbSDimitry Andriclet mayLoad = 1, hasSideEffects = 0 in {
80240b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
80250b57cec5SDimitry Andric            (ins VR128:$src1, memop128:$src2, VR128:$mask),
80260b57cec5SDimitry Andric            !strconcat(OpcodeStr,
80270b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
8028fe6060f1SDimitry Andric            []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
80290b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
80300b57cec5SDimitry Andric            (ins RC256:$src1, memop256:$src2, RC256:$mask),
80310b57cec5SDimitry Andric            !strconcat(OpcodeStr,
80320b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
8033fe6060f1SDimitry Andric            []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
80345ffd83dbSDimitry Andric}
80350b57cec5SDimitry Andric}
80360b57cec5SDimitry Andric
80370b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
80380b57cec5SDimitry Andric  let mayLoad = 1, hasSideEffects = 0, Constraints
80390b57cec5SDimitry Andric    = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
80400b57cec5SDimitry Andric    in {
8041349cc55cSDimitry Andric    defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
804206c3fb27SDimitry Andric                                  VR256, vx128mem, vx256mem>, REX_W;
8043349cc55cSDimitry Andric    defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
804406c3fb27SDimitry Andric                                  VR256, vx128mem, vy256mem>, REX_W;
8045349cc55cSDimitry Andric    defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
80465ffd83dbSDimitry Andric                                  VR256, vx128mem, vy256mem>;
8047349cc55cSDimitry Andric    defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
80485ffd83dbSDimitry Andric                                  VR128, vx64mem, vy128mem>;
80490b57cec5SDimitry Andric
80500b57cec5SDimitry Andric    let ExeDomain = SSEPackedDouble in {
8051349cc55cSDimitry Andric      defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
805206c3fb27SDimitry Andric                                    VR256, vx128mem, vx256mem>, REX_W;
8053349cc55cSDimitry Andric      defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
805406c3fb27SDimitry Andric                                    VR256, vx128mem, vy256mem>, REX_W;
80550b57cec5SDimitry Andric    }
80560b57cec5SDimitry Andric
80570b57cec5SDimitry Andric    let ExeDomain = SSEPackedSingle in {
8058349cc55cSDimitry Andric      defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
8059349cc55cSDimitry Andric                                    VR256, vx128mem, vy256mem>;
8060349cc55cSDimitry Andric      defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
8061349cc55cSDimitry Andric                                    VR128, vx64mem, vy128mem>;
80620b57cec5SDimitry Andric    }
80630b57cec5SDimitry Andric  }
80640b57cec5SDimitry Andric}
80650b57cec5SDimitry Andric
80660b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80670b57cec5SDimitry Andric// GFNI instructions
80680b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80690b57cec5SDimitry Andric
80700b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
80710b57cec5SDimitry Andric                        RegisterClass RC, PatFrag MemOpFrag,
8072bdd1243dSDimitry Andric                        X86MemOperand X86MemOp, X86FoldableSchedWrite sched,
8073bdd1243dSDimitry Andric                        bit Is2Addr = 0> {
80740b57cec5SDimitry Andric  let ExeDomain = SSEPackedInt,
80750b57cec5SDimitry Andric      AsmString = !if(Is2Addr,
80765ffd83dbSDimitry Andric        OpcodeStr#"\t{$src2, $dst|$dst, $src2}",
80775ffd83dbSDimitry Andric        OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
80780b57cec5SDimitry Andric    let isCommutable = 1 in
80790b57cec5SDimitry Andric    def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
80800b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
8081bdd1243dSDimitry Andric             Sched<[sched]>, T8PD;
80820b57cec5SDimitry Andric
80830b57cec5SDimitry Andric    def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
80840b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
80850b57cec5SDimitry Andric                                 (MemOpFrag addr:$src2))))]>,
8086bdd1243dSDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>, T8PD;
80870b57cec5SDimitry Andric  }
80880b57cec5SDimitry Andric}
80890b57cec5SDimitry Andric
80900b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
80910b57cec5SDimitry Andric                           SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
8092bdd1243dSDimitry Andric                           X86MemOperand X86MemOp, X86FoldableSchedWrite sched,
8093bdd1243dSDimitry Andric                           bit Is2Addr = 0> {
80940b57cec5SDimitry Andric  let AsmString = !if(Is2Addr,
80955ffd83dbSDimitry Andric      OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
80965ffd83dbSDimitry Andric      OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
80970b57cec5SDimitry Andric  def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
80980b57cec5SDimitry Andric              (ins RC:$src1, RC:$src2, u8imm:$src3), "",
80998bcb0991SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
8100bdd1243dSDimitry Andric              SSEPackedInt>, Sched<[sched]>;
81010b57cec5SDimitry Andric  def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
81020b57cec5SDimitry Andric              (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
81030b57cec5SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1,
81040b57cec5SDimitry Andric                                    (MemOpFrag addr:$src2),
81058bcb0991SDimitry Andric                              timm:$src3)))], SSEPackedInt>,
8106bdd1243dSDimitry Andric              Sched<[sched.Folded, sched.ReadAfterFold]>;
81070b57cec5SDimitry Andric  }
81080b57cec5SDimitry Andric}
81090b57cec5SDimitry Andric
81100b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
81110b57cec5SDimitry Andric  let Constraints = "$src1 = $dst",
81120b57cec5SDimitry Andric      Predicates  = [HasGFNI, UseSSE2] in
81130b57cec5SDimitry Andric  defm NAME         : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
8114bdd1243dSDimitry Andric                                      VR128, load, i128mem, SchedWriteVecIMul.XMM, 1>;
8115bdd1243dSDimitry Andric  let Predicates  = [HasGFNI, HasAVX, NoVLX] in {
81165ffd83dbSDimitry Andric    defm V#NAME    : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
8117bdd1243dSDimitry Andric                                     load, i128mem, SchedWriteVecIMul.XMM>,
811806c3fb27SDimitry Andric                                     VEX_4V, REX_W;
81195ffd83dbSDimitry Andric    defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
8120bdd1243dSDimitry Andric                                     load, i256mem, SchedWriteVecIMul.YMM>,
812106c3fb27SDimitry Andric                                     VEX_4V, VEX_L, REX_W;
81220b57cec5SDimitry Andric  }
81230b57cec5SDimitry Andric}
81240b57cec5SDimitry Andric
81250b57cec5SDimitry Andric// GF2P8MULB
81260b57cec5SDimitry Andriclet Constraints = "$src1 = $dst",
81270b57cec5SDimitry Andric    Predicates  = [HasGFNI, UseSSE2] in
81280b57cec5SDimitry Andricdefm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
8129bdd1243dSDimitry Andric                                    i128mem, SchedWriteVecALU.XMM, 1>;
8130bdd1243dSDimitry Andriclet Predicates  = [HasGFNI, HasAVX, NoVLX] in {
81310b57cec5SDimitry Andric  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
8132bdd1243dSDimitry Andric                                   i128mem, SchedWriteVecALU.XMM>, VEX_4V;
81330b57cec5SDimitry Andric  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
8134bdd1243dSDimitry Andric                                   i256mem, SchedWriteVecALU.YMM>, VEX_4V, VEX_L;
81350b57cec5SDimitry Andric}
81360b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB
81370b57cec5SDimitry Andriclet isCommutable = 0 in {
81380b57cec5SDimitry Andric  defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb",
81390b57cec5SDimitry Andric                                             X86GF2P8affineinvqb>, TAPD;
81400b57cec5SDimitry Andric  defm GF2P8AFFINEQB    : GF2P8AFFINE_common<0xCE, "gf2p8affineqb",
81410b57cec5SDimitry Andric                                             X86GF2P8affineqb>, TAPD;
81420b57cec5SDimitry Andric}
81430b57cec5SDimitry Andric
8144bdd1243dSDimitry Andric// AVX-IFMA
8145bdd1243dSDimitry Andriclet Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
8146bdd1243dSDimitry Andric    checkVEXPredicate = 1 in
8147bdd1243dSDimitry Andricmulticlass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
8148bdd1243dSDimitry Andric  // NOTE: The SDNode have the multiply operands first with the add last.
8149bdd1243dSDimitry Andric  // This enables commuted load patterns to be autogenerated by tablegen.
8150bdd1243dSDimitry Andric  let isCommutable = 1 in {
8151bdd1243dSDimitry Andric    def rr  : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
8152bdd1243dSDimitry Andric               (ins VR128:$src1, VR128:$src2, VR128:$src3),
8153bdd1243dSDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8154bdd1243dSDimitry Andric               [(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
8155bdd1243dSDimitry Andric                                         VR128:$src3, VR128:$src1)))]>,
8156bdd1243dSDimitry Andric               VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
8157bdd1243dSDimitry Andric  }
8158bdd1243dSDimitry Andric    def rm  : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
8159bdd1243dSDimitry Andric               (ins VR128:$src1, VR128:$src2, i128mem:$src3),
8160bdd1243dSDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8161bdd1243dSDimitry Andric               [(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
8162bdd1243dSDimitry Andric                                        (loadv2i64 addr:$src3), VR128:$src1)))]>,
8163bdd1243dSDimitry Andric               VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
8164bdd1243dSDimitry Andric  let isCommutable = 1 in {
8165bdd1243dSDimitry Andric    def Yrr  : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
8166bdd1243dSDimitry Andric               (ins VR256:$src1, VR256:$src2, VR256:$src3),
8167bdd1243dSDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8168bdd1243dSDimitry Andric               [(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
8169bdd1243dSDimitry Andric                                         VR256:$src3, VR256:$src1)))]>,
8170bdd1243dSDimitry Andric               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
8171bdd1243dSDimitry Andric  }
8172bdd1243dSDimitry Andric    def Yrm  : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
8173bdd1243dSDimitry Andric               (ins VR256:$src1, VR256:$src2, i256mem:$src3),
8174bdd1243dSDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8175bdd1243dSDimitry Andric               [(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
8176bdd1243dSDimitry Andric                                        (loadv4i64 addr:$src3), VR256:$src1)))]>,
8177bdd1243dSDimitry Andric               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
8178bdd1243dSDimitry Andric}
8179bdd1243dSDimitry Andric
818006c3fb27SDimitry Andricdefm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, REX_W, ExplicitVEXPrefix;
818106c3fb27SDimitry Andricdefm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, REX_W, ExplicitVEXPrefix;
8182bdd1243dSDimitry Andric
8183bdd1243dSDimitry Andric// AVX-VNNI-INT8
8184bdd1243dSDimitry Andriclet Constraints = "$src1 = $dst" in
8185bdd1243dSDimitry Andricmulticlass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT,
8186bdd1243dSDimitry Andric                          RegisterClass RC, PatFrag MemOpFrag,
8187bdd1243dSDimitry Andric                          X86MemOperand X86memop, SDNode OpNode,
8188bdd1243dSDimitry Andric                          X86FoldableSchedWrite Sched,
8189bdd1243dSDimitry Andric                          bit IsCommutable> {
8190bdd1243dSDimitry Andric  let isCommutable = IsCommutable in
8191bdd1243dSDimitry Andric  def rr  :  I<Opc, MRMSrcReg, (outs RC:$dst),
8192bdd1243dSDimitry Andric             (ins RC:$src1, RC:$src2, RC:$src3),
8193bdd1243dSDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8194bdd1243dSDimitry Andric             [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
8195bdd1243dSDimitry Andric             VEX_4V, Sched<[Sched]>;
8196bdd1243dSDimitry Andric  def rm  :  I<Opc, MRMSrcMem, (outs RC:$dst),
8197bdd1243dSDimitry Andric             (ins RC:$src1, RC:$src2, X86memop:$src3),
8198bdd1243dSDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
8199bdd1243dSDimitry Andric             [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
8200bdd1243dSDimitry Andric                                   (MemOpFrag addr:$src3))))]>,
8201bdd1243dSDimitry Andric             VEX_4V, Sched<[Sched.Folded, Sched.ReadAfterFold]>;
8202bdd1243dSDimitry Andric}
8203bdd1243dSDimitry Andric
8204bdd1243dSDimitry Andriclet Predicates = [HasAVXVNNIINT8] in {
8205bdd1243dSDimitry Andric  defm VPDPBSSD   : avx_dotprod_rm<0x50,"vpdpbssd",  v4i32, VR128, loadv4i32,
8206bdd1243dSDimitry Andric                                   i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM,
8207bdd1243dSDimitry Andric                                   1>, T8XD;
8208bdd1243dSDimitry Andric  defm VPDPBSSDY  : avx_dotprod_rm<0x50,"vpdpbssd",  v8i32, VR256, loadv8i32,
8209bdd1243dSDimitry Andric                                   i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM,
8210bdd1243dSDimitry Andric                                   1>, VEX_L, T8XD;
8211bdd1243dSDimitry Andric  defm VPDPBUUD   : avx_dotprod_rm<0x50,"vpdpbuud",  v4i32, VR128, loadv4i32,
8212bdd1243dSDimitry Andric                                   i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM,
8213bdd1243dSDimitry Andric                                   1>, T8PS;
8214bdd1243dSDimitry Andric  defm VPDPBUUDY  : avx_dotprod_rm<0x50,"vpdpbuud",  v8i32, VR256, loadv8i32,
8215bdd1243dSDimitry Andric                                   i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM,
8216bdd1243dSDimitry Andric                                   1>, VEX_L, T8PS;
8217bdd1243dSDimitry Andric  defm VPDPBSSDS  : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32,
8218bdd1243dSDimitry Andric                                   i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM,
8219bdd1243dSDimitry Andric                                   1>, T8XD;
8220bdd1243dSDimitry Andric  defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32,
8221bdd1243dSDimitry Andric                                   i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM,
8222bdd1243dSDimitry Andric                                   1>, VEX_L, T8XD;
8223bdd1243dSDimitry Andric  defm VPDPBUUDS  : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32,
8224bdd1243dSDimitry Andric                                   i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM,
8225bdd1243dSDimitry Andric                                   1>, T8PS;
8226bdd1243dSDimitry Andric  defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32,
8227bdd1243dSDimitry Andric                                   i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM,
8228bdd1243dSDimitry Andric                                   1>, VEX_L, T8PS;
8229bdd1243dSDimitry Andric  defm VPDPBSUD   : avx_dotprod_rm<0x50,"vpdpbsud",  v4i32, VR128, loadv4i32,
8230bdd1243dSDimitry Andric                                   i128mem, X86vpdpbsud,  SchedWriteVecIMul.XMM,
8231bdd1243dSDimitry Andric                                   0>, T8XS;
8232bdd1243dSDimitry Andric  defm VPDPBSUDY  : avx_dotprod_rm<0x50,"vpdpbsud",  v8i32, VR256, loadv8i32,
8233bdd1243dSDimitry Andric                                   i256mem, X86vpdpbsud,  SchedWriteVecIMul.YMM,
8234bdd1243dSDimitry Andric                                   0>,  VEX_L, T8XS;
8235bdd1243dSDimitry Andric  defm VPDPBSUDS  : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32,
8236bdd1243dSDimitry Andric                                   i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM,
8237bdd1243dSDimitry Andric                                   0>, T8XS;
8238bdd1243dSDimitry Andric  defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32,
8239bdd1243dSDimitry Andric                                   i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM,
8240bdd1243dSDimitry Andric                                   0>, VEX_L, T8XS;
8241bdd1243dSDimitry Andric}
8242bdd1243dSDimitry Andric
8243bdd1243dSDimitry Andric// AVX-NE-CONVERT
8244bdd1243dSDimitry Andricmulticlass AVX_NE_CONVERT_BASE<bits<8> Opcode, string OpcodeStr,
8245bdd1243dSDimitry Andric                  X86MemOperand MemOp128, X86MemOperand MemOp256> {
8246bdd1243dSDimitry Andric  def rm : I<Opcode, MRMSrcMem, (outs VR128:$dst), (ins MemOp128:$src),
8247bdd1243dSDimitry Andric              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8248bdd1243dSDimitry Andric              [(set VR128:$dst,
8249bdd1243dSDimitry Andric                (!cast<Intrinsic>("int_x86_"#OpcodeStr#"128") addr:$src))]>,
8250bdd1243dSDimitry Andric              Sched<[WriteCvtPH2PS]>, VEX;
8251bdd1243dSDimitry Andric  def Yrm : I<Opcode, MRMSrcMem, (outs VR256:$dst), (ins MemOp256:$src),
8252bdd1243dSDimitry Andric              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8253bdd1243dSDimitry Andric              [(set VR256:$dst,
8254bdd1243dSDimitry Andric                (!cast<Intrinsic>("int_x86_"#OpcodeStr#"256") addr:$src))]>,
8255bdd1243dSDimitry Andric              Sched<[WriteCvtPH2PSY]>, VEX, VEX_L;
8256bdd1243dSDimitry Andric}
8257bdd1243dSDimitry Andric
8258bdd1243dSDimitry Andricmulticlass VCVTNEPS2BF16_BASE {
8259bdd1243dSDimitry Andric  def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
8260bdd1243dSDimitry Andric             "vcvtneps2bf16\t{$src, $dst|$dst, $src}",
8261bdd1243dSDimitry Andric             [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>,
8262bdd1243dSDimitry Andric             Sched<[WriteCvtPH2PS]>;
8263bdd1243dSDimitry Andric  def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
8264bdd1243dSDimitry Andric             "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}",
8265bdd1243dSDimitry Andric             [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>,
8266bdd1243dSDimitry Andric             Sched<[WriteCvtPH2PS]>;
8267bdd1243dSDimitry Andric  def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
8268bdd1243dSDimitry Andric             "vcvtneps2bf16\t{$src, $dst|$dst, $src}",
8269bdd1243dSDimitry Andric             [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>,
8270bdd1243dSDimitry Andric             Sched<[WriteCvtPH2PSY]>, VEX_L;
8271bdd1243dSDimitry Andric  def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
8272bdd1243dSDimitry Andric             "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}",
8273bdd1243dSDimitry Andric             [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>,
8274bdd1243dSDimitry Andric             Sched<[WriteCvtPH2PSY]>, VEX_L;
8275bdd1243dSDimitry Andric}
8276bdd1243dSDimitry Andric
8277bdd1243dSDimitry Andriclet Predicates = [HasAVXNECONVERT] in {
8278bdd1243dSDimitry Andric  defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem,
8279bdd1243dSDimitry Andric       f16mem>, T8XS;
8280bdd1243dSDimitry Andric  defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>,
8281bdd1243dSDimitry Andric       T8PD;
8282bdd1243dSDimitry Andric  defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem,
8283bdd1243dSDimitry Andric       f256mem>, T8XS;
8284bdd1243dSDimitry Andric  defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem,
8285bdd1243dSDimitry Andric       f256mem>, T8PD;
8286bdd1243dSDimitry Andric  defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem,
8287bdd1243dSDimitry Andric       f256mem>, T8XD;
8288bdd1243dSDimitry Andric  defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem,
8289bdd1243dSDimitry Andric       f256mem>, T8PS;
8290bdd1243dSDimitry Andric  let checkVEXPredicate = 1 in
8291bdd1243dSDimitry Andric  defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;
8292*8a4dda33SDimitry Andric
8293*8a4dda33SDimitry Andric  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))),
8294*8a4dda33SDimitry Andric            (VCVTNEPS2BF16Yrr VR256:$src)>;
8295*8a4dda33SDimitry Andric  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
8296*8a4dda33SDimitry Andric            (VCVTNEPS2BF16Yrm addr:$src)>;
8297bdd1243dSDimitry Andric}
8298bdd1243dSDimitry Andric
8299bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}",
8300bdd1243dSDimitry Andric                (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">;
8301bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}",
8302bdd1243dSDimitry Andric                (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">;
830306c3fb27SDimitry Andric
830406c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SHA512 than WriteVecIMul?
830506c3fb27SDimitry Andriclet Predicates = [HasSHA512], Constraints = "$src1 = $dst" in {
830606c3fb27SDimitry Andricdef VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst),
830706c3fb27SDimitry Andric                     (ins VR256:$src1, VR128:$src2),
830806c3fb27SDimitry Andric                     "vsha512msg1\t{$src2, $dst|$dst, $src2}",
830906c3fb27SDimitry Andric                     [(set VR256:$dst,
831006c3fb27SDimitry Andric                       (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L,
831106c3fb27SDimitry Andric                     VEX, T8XD, Sched<[WriteVecIMul]>;
831206c3fb27SDimitry Andricdef VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst),
831306c3fb27SDimitry Andric                     (ins VR256:$src1, VR256:$src2),
831406c3fb27SDimitry Andric                     "vsha512msg2\t{$src2, $dst|$dst, $src2}",
831506c3fb27SDimitry Andric                     [(set VR256:$dst,
831606c3fb27SDimitry Andric                       (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L,
831706c3fb27SDimitry Andric                     VEX, T8XD, Sched<[WriteVecIMul]>;
831806c3fb27SDimitry Andricdef VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst),
831906c3fb27SDimitry Andric                      (ins VR256:$src1, VR256:$src2, VR128:$src3),
832006c3fb27SDimitry Andric                      "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}",
832106c3fb27SDimitry Andric                      [(set VR256:$dst,
832206c3fb27SDimitry Andric                        (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>,
832306c3fb27SDimitry Andric                      VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>;
832406c3fb27SDimitry Andric}
832506c3fb27SDimitry Andric
832606c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SM3 than WriteVecIMul?
832706c3fb27SDimitry Andriclet Predicates = [HasSM3], Constraints = "$src1 = $dst" in {
832806c3fb27SDimitry Andric  multiclass SM3_Base<string OpStr> {
832906c3fb27SDimitry Andric    def rr : I<0xda, MRMSrcReg, (outs VR128:$dst),
833006c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, VR128:$src3),
833106c3fb27SDimitry Andric              !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
833206c3fb27SDimitry Andric              [(set VR128:$dst,
833306c3fb27SDimitry Andric               (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
833406c3fb27SDimitry Andric                VR128:$src2, VR128:$src3))]>,
833506c3fb27SDimitry Andric              Sched<[WriteVecIMul]>, VEX_4V;
833606c3fb27SDimitry Andric    def rm : I<0xda, MRMSrcMem, (outs VR128:$dst),
833706c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, i128mem:$src3),
833806c3fb27SDimitry Andric              !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
833906c3fb27SDimitry Andric              [(set VR128:$dst,
834006c3fb27SDimitry Andric               (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
834106c3fb27SDimitry Andric                VR128:$src2, (loadv4i32 addr:$src3)))]>,
834206c3fb27SDimitry Andric              Sched<[WriteVecIMul]>, VEX_4V;
834306c3fb27SDimitry Andric  }
834406c3fb27SDimitry Andric
834506c3fb27SDimitry Andric  multiclass VSM3RNDS2_Base {
834606c3fb27SDimitry Andric    def rr : Ii8<0xde, MRMSrcReg, (outs VR128:$dst),
834706c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, VR128:$src3, i32u8imm:$src4),
834806c3fb27SDimitry Andric              "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}",
834906c3fb27SDimitry Andric              [(set VR128:$dst,
835006c3fb27SDimitry Andric               (int_x86_vsm3rnds2 VR128:$src1,
835106c3fb27SDimitry Andric                VR128:$src2, VR128:$src3, timm:$src4))]>,
835206c3fb27SDimitry Andric              Sched<[WriteVecIMul]>;
835306c3fb27SDimitry Andric    def rm : Ii8<0xde, MRMSrcMem, (outs VR128:$dst),
835406c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, i128mem:$src3, i32u8imm:$src4),
835506c3fb27SDimitry Andric              "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}",
835606c3fb27SDimitry Andric              [(set VR128:$dst,
835706c3fb27SDimitry Andric               (int_x86_vsm3rnds2 VR128:$src1,
835806c3fb27SDimitry Andric                VR128:$src2, (loadv4i32 addr:$src3), timm:$src4))]>,
835906c3fb27SDimitry Andric              Sched<[WriteVecIMul]>;
836006c3fb27SDimitry Andric  }
836106c3fb27SDimitry Andric}
836206c3fb27SDimitry Andric
836306c3fb27SDimitry Andricdefm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8PS;
836406c3fb27SDimitry Andricdefm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8PD;
836506c3fb27SDimitry Andricdefm VSM3RNDS2 : VSM3RNDS2_Base, VEX_4V, TAPD;
836606c3fb27SDimitry Andric
836706c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SM4 than WriteVecIMul?
836806c3fb27SDimitry Andriclet Predicates = [HasSM4] in {
836906c3fb27SDimitry Andric  multiclass SM4_Base<string OpStr, RegisterClass RC, string VL,
837006c3fb27SDimitry Andric                      PatFrag LD, X86MemOperand MemOp> {
837106c3fb27SDimitry Andric    def rr : I<0xda, MRMSrcReg, (outs RC:$dst),
837206c3fb27SDimitry Andric               (ins RC:$src1, RC:$src2),
837306c3fb27SDimitry Andric               !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
837406c3fb27SDimitry Andric               [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1,
837506c3fb27SDimitry Andric                  RC:$src2))]>,
837606c3fb27SDimitry Andric               Sched<[WriteVecIMul]>;
837706c3fb27SDimitry Andric    def rm : I<0xda, MRMSrcMem, (outs RC:$dst),
837806c3fb27SDimitry Andric               (ins RC:$src1, MemOp:$src2),
837906c3fb27SDimitry Andric               !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
838006c3fb27SDimitry Andric               [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1,
838106c3fb27SDimitry Andric                 (LD addr:$src2)))]>,
838206c3fb27SDimitry Andric               Sched<[WriteVecIMul]>;
838306c3fb27SDimitry Andric  }
838406c3fb27SDimitry Andric}
838506c3fb27SDimitry Andric
838606c3fb27SDimitry Andricdefm VSM4KEY4  : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8XS, VEX_4V;
838706c3fb27SDimitry Andricdefm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8XS, VEX_L, VEX_4V;
838806c3fb27SDimitry Andricdefm VSM4RNDS4  : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8XD, VEX_4V;
838906c3fb27SDimitry Andricdefm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8XD, VEX_L, VEX_4V;
839006c3fb27SDimitry Andric
839106c3fb27SDimitry Andriclet Predicates = [HasAVXVNNIINT16], Constraints = "$src1 = $dst" in
839206c3fb27SDimitry Andricmulticlass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
839306c3fb27SDimitry Andric  let isCommutable = IsCommutable in
839406c3fb27SDimitry Andric  def rr  : I<opc, MRMSrcReg, (outs VR128:$dst),
839506c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, VR128:$src3),
839606c3fb27SDimitry Andric              !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
839706c3fb27SDimitry Andric              [(set VR128:$dst,
839806c3fb27SDimitry Andric                (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
839906c3fb27SDimitry Andric                        VR128:$src1, VR128:$src2, VR128:$src3)))]>,
840006c3fb27SDimitry Andric              VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
840106c3fb27SDimitry Andric
840206c3fb27SDimitry Andric  def rm  : I<opc, MRMSrcMem, (outs VR128:$dst),
840306c3fb27SDimitry Andric              (ins VR128:$src1, VR128:$src2, i128mem:$src3),
840406c3fb27SDimitry Andric              !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
840506c3fb27SDimitry Andric              [(set VR128:$dst,
840606c3fb27SDimitry Andric                (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
840706c3fb27SDimitry Andric                        VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>,
840806c3fb27SDimitry Andric              VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
840906c3fb27SDimitry Andric
841006c3fb27SDimitry Andric  let isCommutable = IsCommutable in
841106c3fb27SDimitry Andric  def Yrr  : I<opc, MRMSrcReg, (outs VR256:$dst),
841206c3fb27SDimitry Andric               (ins VR256:$src1, VR256:$src2, VR256:$src3),
841306c3fb27SDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
841406c3fb27SDimitry Andric               [(set VR256:$dst,
841506c3fb27SDimitry Andric                 (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
841606c3fb27SDimitry Andric                         VR256:$src1, VR256:$src2, VR256:$src3)))]>,
841706c3fb27SDimitry Andric               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
841806c3fb27SDimitry Andric
841906c3fb27SDimitry Andric  def Yrm  : I<opc, MRMSrcMem, (outs VR256:$dst),
842006c3fb27SDimitry Andric               (ins VR256:$src1, VR256:$src2, i256mem:$src3),
842106c3fb27SDimitry Andric               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
842206c3fb27SDimitry Andric               [(set VR256:$dst,
842306c3fb27SDimitry Andric                 (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
842406c3fb27SDimitry Andric                         VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>,
842506c3fb27SDimitry Andric               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
842606c3fb27SDimitry Andric}
842706c3fb27SDimitry Andric
842806c3fb27SDimitry Andricdefm VPDPWSUD   : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8XS;
842906c3fb27SDimitry Andricdefm VPDPWSUDS  : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8XS;
843006c3fb27SDimitry Andricdefm VPDPWUSD   : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8PD;
843106c3fb27SDimitry Andricdefm VPDPWUSDS  : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8PD;
843206c3fb27SDimitry Andricdefm VPDPWUUD   : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8PS;
843306c3fb27SDimitry Andricdefm VPDPWUUDS  : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8PS;
8434