xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td (revision fe6060f10f634930ff71b7c50291ddc610da2475)
10b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric//
90b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions,
100b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation,
110b57cec5SDimitry Andric// machine code emission, and analysis.
120b57cec5SDimitry Andric//
130b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes
170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
20*fe6060f1SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
210b57cec5SDimitry Andric                           RegisterClass RC, X86MemOperand x86memop,
220b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
230b57cec5SDimitry Andric                           bit Is2Addr = 1> {
240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
250b57cec5SDimitry Andric  let isCommutable = 1 in {
260b57cec5SDimitry Andric    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
270b57cec5SDimitry Andric       !if(Is2Addr,
280b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
290b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
300b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>,
310b57cec5SDimitry Andric       Sched<[sched]>;
320b57cec5SDimitry Andric  }
330b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
340b57cec5SDimitry Andric       !if(Is2Addr,
350b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
360b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
370b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
380b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
390b57cec5SDimitry Andric}
400b57cec5SDimitry Andric}
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
430b57cec5SDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
440b57cec5SDimitry Andric                               SDPatternOperator OpNode, RegisterClass RC,
450b57cec5SDimitry Andric                               ValueType VT, string asm, Operand memopr,
465ffd83dbSDimitry Andric                               PatFrags mem_frags, Domain d,
470b57cec5SDimitry Andric                               X86FoldableSchedWrite sched, bit Is2Addr = 1> {
480b57cec5SDimitry Andriclet hasSideEffects = 0 in {
490b57cec5SDimitry Andric  def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
500b57cec5SDimitry Andric       !if(Is2Addr,
510b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
520b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
530b57cec5SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>,
540b57cec5SDimitry Andric       Sched<[sched]>;
550b57cec5SDimitry Andric  let mayLoad = 1 in
560b57cec5SDimitry Andric  def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
570b57cec5SDimitry Andric       !if(Is2Addr,
580b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
590b57cec5SDimitry Andric           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
605ffd83dbSDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>,
610b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
620b57cec5SDimitry Andric}
630b57cec5SDimitry Andric}
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class
66*fe6060f1SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
670b57cec5SDimitry Andric                           RegisterClass RC, ValueType vt,
680b57cec5SDimitry Andric                           X86MemOperand x86memop, PatFrag mem_frag,
690b57cec5SDimitry Andric                           Domain d, X86FoldableSchedWrite sched,
700b57cec5SDimitry Andric                           bit Is2Addr = 1> {
710b57cec5SDimitry Andric  let isCommutable = 1 in
720b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
730b57cec5SDimitry Andric       !if(Is2Addr,
740b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
750b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
760b57cec5SDimitry Andric       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>,
770b57cec5SDimitry Andric       Sched<[sched]>;
780b57cec5SDimitry Andric  let mayLoad = 1 in
790b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
800b57cec5SDimitry Andric       !if(Is2Addr,
810b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
820b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
830b57cec5SDimitry Andric       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
840b57cec5SDimitry Andric          d>,
850b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
860b57cec5SDimitry Andric}
870b57cec5SDimitry Andric
880b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
890b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
900b57cec5SDimitry Andric                                      string OpcodeStr, X86MemOperand x86memop,
910b57cec5SDimitry Andric                                      X86FoldableSchedWrite sched,
920b57cec5SDimitry Andric                                      list<dag> pat_rr, list<dag> pat_rm,
930b57cec5SDimitry Andric                                      bit Is2Addr = 1> {
940b57cec5SDimitry Andric  let isCommutable = 1, hasSideEffects = 0 in
950b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
960b57cec5SDimitry Andric       !if(Is2Addr,
970b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
980b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
990b57cec5SDimitry Andric       pat_rr, d>,
1000b57cec5SDimitry Andric       Sched<[sched]>;
1010b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
1020b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
1030b57cec5SDimitry Andric       !if(Is2Addr,
1040b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1050b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1060b57cec5SDimitry Andric       pat_rm, d>,
1070b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
1080b57cec5SDimitry Andric}
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
1120b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos.
1130b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1140b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
1150b57cec5SDimitry Andric  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
1160b57cec5SDimitry Andric                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
1170b57cec5SDimitry Andric  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
1188bcb0991SDimitry Andric                   [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>;
1198bcb0991SDimitry Andric  def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1208bcb0991SDimitry Andric                     [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>;
1210b57cec5SDimitry Andric}
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1240b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors
1250b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1260b57cec5SDimitry Andric
1270b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse.
1280b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
1290b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor.
1300b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
1310b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial.
1320b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1338bcb0991SDimitry Andric    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
1340b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1350b57cec5SDimitry Andric               [(set VR128:$dst, (v4f32 immAllZerosV))]>;
1360b57cec5SDimitry Andric}
1370b57cec5SDimitry Andric
1388bcb0991SDimitry Andriclet Predicates = [NoAVX512] in {
1398bcb0991SDimitry Andricdef : Pat<(v16i8 immAllZerosV), (V_SET0)>;
1408bcb0991SDimitry Andricdef : Pat<(v8i16 immAllZerosV), (V_SET0)>;
1410b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>;
1428bcb0991SDimitry Andricdef : Pat<(v2i64 immAllZerosV), (V_SET0)>;
1438bcb0991SDimitry Andricdef : Pat<(v2f64 immAllZerosV), (V_SET0)>;
1448bcb0991SDimitry Andric}
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric
1470b57cec5SDimitry Andric// The same as done above but for AVX.  The 256-bit AVX1 ISA doesn't support PI,
1480b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero
1490b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY
1500b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty
1510b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1520b57cec5SDimitry Andric    isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
1530b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
1540b57cec5SDimitry Andric                 [(set VR256:$dst, (v8i32 immAllZerosV))]>;
1550b57cec5SDimitry Andric}
1560b57cec5SDimitry Andric
1578bcb0991SDimitry Andriclet Predicates = [NoAVX512] in {
1588bcb0991SDimitry Andricdef : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
1598bcb0991SDimitry Andricdef : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
1608bcb0991SDimitry Andricdef : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
1618bcb0991SDimitry Andricdef : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
1628bcb0991SDimitry Andricdef : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
1638bcb0991SDimitry Andric}
1648bcb0991SDimitry Andric
1650b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool
1660b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial.
1670b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
1680b57cec5SDimitry Andric    isPseudo = 1, SchedRW = [WriteZero] in {
1690b57cec5SDimitry Andric  def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
1700b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 immAllOnesV))]>;
1710b57cec5SDimitry Andric  let Predicates = [HasAVX1Only, OptForMinSize] in {
1720b57cec5SDimitry Andric  def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
1730b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
1740b57cec5SDimitry Andric  }
1750b57cec5SDimitry Andric  let Predicates = [HasAVX2] in
1760b57cec5SDimitry Andric  def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
1770b57cec5SDimitry Andric                          [(set VR256:$dst, (v8i32 immAllOnesV))]>;
1780b57cec5SDimitry Andric}
1790b57cec5SDimitry Andric
1800b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1810b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions
1820b57cec5SDimitry Andric//
1830b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
1840b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register
1850b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
1860b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we
1870b57cec5SDimitry Andric// don't use movss/movsd for copies.
1880b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt,
1910b57cec5SDimitry Andric                         X86MemOperand x86memop, string base_opc,
1920b57cec5SDimitry Andric                         string asm_opr, Domain d, string Name> {
1930b57cec5SDimitry Andric  let isCommutable = 1 in
1940b57cec5SDimitry Andric  def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
1950b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2),
1960b57cec5SDimitry Andric              !strconcat(base_opc, asm_opr),
1970b57cec5SDimitry Andric              [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>,
1980b57cec5SDimitry Andric              Sched<[SchedWriteFShuffle.XMM]>;
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric  // For the disassembler
2010b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
2020b57cec5SDimitry Andric  def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
2030b57cec5SDimitry Andric                  (ins VR128:$src1, VR128:$src2),
2040b57cec5SDimitry Andric                  !strconcat(base_opc, asm_opr), []>,
2050b57cec5SDimitry Andric                  Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>;
2060b57cec5SDimitry Andric}
2070b57cec5SDimitry Andric
2080b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
2090b57cec5SDimitry Andric                      X86MemOperand x86memop, string OpcodeStr,
2100b57cec5SDimitry Andric                      Domain d, string Name, Predicate pred> {
2110b57cec5SDimitry Andric  // AVX
2120b57cec5SDimitry Andric  let Predicates = [UseAVX, OptForSize] in
2130b57cec5SDimitry Andric  defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
2140b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
2150b57cec5SDimitry Andric                              "V"#Name>,
2160b57cec5SDimitry Andric                              VEX_4V, VEX_LIG, VEX_WIG;
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric  def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2190b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2200b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
2210b57cec5SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG;
2220b57cec5SDimitry Andric  // SSE1 & 2
2230b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
2240b57cec5SDimitry Andric    let Predicates = [pred, NoSSE41_Or_OptForSize] in
2250b57cec5SDimitry Andric    defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
2260b57cec5SDimitry Andric                              "\t{$src2, $dst|$dst, $src2}", d, Name>;
2270b57cec5SDimitry Andric  }
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric  def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2300b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2310b57cec5SDimitry Andric                     [(store RC:$src, addr:$dst)], d>,
2320b57cec5SDimitry Andric                     Sched<[WriteFStore]>;
2330b57cec5SDimitry Andric
2340b57cec5SDimitry Andric  def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2350b57cec5SDimitry Andric                  (!cast<Instruction>("V"#NAME#"rr_REV")
2360b57cec5SDimitry Andric                   VR128:$dst, VR128:$src1, VR128:$src2), 0>;
2370b57cec5SDimitry Andric  def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}",
2380b57cec5SDimitry Andric                  (!cast<Instruction>(NAME#"rr_REV")
2390b57cec5SDimitry Andric                   VR128:$dst, VR128:$src2), 0>;
2400b57cec5SDimitry Andric}
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits.
2430b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
2440b57cec5SDimitry Andric                         PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
2450b57cec5SDimitry Andric                         Domain d> {
2460b57cec5SDimitry Andric  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
2470b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2480b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
2490b57cec5SDimitry Andric                     VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
2500b57cec5SDimitry Andric  def NAME#rm   : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
2510b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2520b57cec5SDimitry Andric                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
2530b57cec5SDimitry Andric                     Sched<[WriteFLoad]>;
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andric  // _alt version uses FR32/FR64 register class.
2560b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
2570b57cec5SDimitry Andric  def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2580b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2590b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
2600b57cec5SDimitry Andric                         VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
2610b57cec5SDimitry Andric  def NAME#rm_alt   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2620b57cec5SDimitry Andric                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2630b57cec5SDimitry Andric                         [(set RC:$dst, (mem_pat addr:$src))], d>,
2640b57cec5SDimitry Andric                         Sched<[WriteFLoad]>;
2650b57cec5SDimitry Andric  }
2660b57cec5SDimitry Andric}
2670b57cec5SDimitry Andric
2680b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
2690b57cec5SDimitry Andric                        SSEPackedSingle, "MOVSS", UseSSE1>, XS;
2700b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
2710b57cec5SDimitry Andric                        SSEPackedDouble, "MOVSD", UseSSE2>, XD;
2720b57cec5SDimitry Andric
2730b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in {
2740b57cec5SDimitry Andric  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
2750b57cec5SDimitry Andric                             SSEPackedSingle>, XS;
2760b57cec5SDimitry Andric  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
2770b57cec5SDimitry Andric                             SSEPackedDouble>, XD;
2780b57cec5SDimitry Andric}
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric// Patterns
2810b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
2820b57cec5SDimitry Andric  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2830b57cec5SDimitry Andric            (VMOVSSrm addr:$src)>;
2840b57cec5SDimitry Andric  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2850b57cec5SDimitry Andric            (VMOVSDrm addr:$src)>;
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric  // Represent the same patterns above but in the form they appear for
2880b57cec5SDimitry Andric  // 256-bit types
2890b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzload32 addr:$src)),
2900b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
2910b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzload64 addr:$src)),
2920b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
2930b57cec5SDimitry Andric}
2940b57cec5SDimitry Andric
2950b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
2960b57cec5SDimitry Andric  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
2970b57cec5SDimitry Andric  // MOVSS to the lower bits.
2980b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
2990b57cec5SDimitry Andric            (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
3000b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
3010b57cec5SDimitry Andric            (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric  // Move low f32 and clear high bits.
3040b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
3050b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
3060b57cec5SDimitry Andric             (v4f32 (VMOVSSrr (v4f32 (V_SET0)),
3070b57cec5SDimitry Andric              (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>;
3080b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
3090b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
3100b57cec5SDimitry Andric             (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
3110b57cec5SDimitry Andric              (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
3120b57cec5SDimitry Andric}
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
3150b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a
3160b57cec5SDimitry Andric// MOVSS to the lower bits.
3170b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
3180b57cec5SDimitry Andric          (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
3190b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
3200b57cec5SDimitry Andric          (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
3210b57cec5SDimitry Andric}
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andriclet Predicates = [UseSSE2] in
3240b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3250b57cec5SDimitry Andric          (MOVSDrm addr:$src)>;
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andriclet Predicates = [UseSSE1] in
3280b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3290b57cec5SDimitry Andric          (MOVSSrm addr:$src)>;
3300b57cec5SDimitry Andric
3310b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3320b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
3330b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
3340b57cec5SDimitry Andric
3350b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC,
3360b57cec5SDimitry Andric                            X86MemOperand x86memop, PatFrag ld_frag,
3370b57cec5SDimitry Andric                            string asm, Domain d,
3380b57cec5SDimitry Andric                            X86SchedWriteMoveLS sched> {
3390b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in
3400b57cec5SDimitry Andric  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3410b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
3420b57cec5SDimitry Andric           Sched<[sched.RR]>;
3430b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in
3440b57cec5SDimitry Andric  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3450b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3460b57cec5SDimitry Andric                   [(set RC:$dst, (ld_frag addr:$src))], d>,
3470b57cec5SDimitry Andric           Sched<[sched.RM]>;
3480b57cec5SDimitry Andric}
3490b57cec5SDimitry Andric
3500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
3510b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
3520b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3530b57cec5SDimitry Andric                                PS, VEX, VEX_WIG;
3540b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
3550b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3560b57cec5SDimitry Andric                                PD, VEX, VEX_WIG;
3570b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
3580b57cec5SDimitry Andric                                SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3590b57cec5SDimitry Andric                                PS, VEX, VEX_WIG;
3600b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
3610b57cec5SDimitry Andric                                SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3620b57cec5SDimitry Andric                                PD, VEX, VEX_WIG;
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
3650b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
3660b57cec5SDimitry Andric                                 PS, VEX, VEX_L, VEX_WIG;
3670b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
3680b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
3690b57cec5SDimitry Andric                                 PD, VEX, VEX_L, VEX_WIG;
3700b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
3710b57cec5SDimitry Andric                                 SSEPackedSingle, SchedWriteFMoveLS.YMM>,
3720b57cec5SDimitry Andric                                 PS, VEX, VEX_L, VEX_WIG;
3730b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
3740b57cec5SDimitry Andric                                 SSEPackedDouble, SchedWriteFMoveLS.YMM>,
3750b57cec5SDimitry Andric                                 PD, VEX, VEX_L, VEX_WIG;
3760b57cec5SDimitry Andric}
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
3790b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
3800b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3810b57cec5SDimitry Andric                               PS;
3820b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
3830b57cec5SDimitry Andric                               SSEPackedSingle, SchedWriteFMoveLS.XMM>,
3840b57cec5SDimitry Andric                               PS;
3850b57cec5SDimitry Andric}
3860b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
3870b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
3880b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3890b57cec5SDimitry Andric                               PD;
3900b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
3910b57cec5SDimitry Andric                               SSEPackedDouble, SchedWriteFMoveLS.XMM>,
3920b57cec5SDimitry Andric                               PD;
3930b57cec5SDimitry Andric}
3940b57cec5SDimitry Andric
3950b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX]  in {
3960b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
3970b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3980b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
3990b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
4000b57cec5SDimitry Andric                   VEX, VEX_WIG;
4010b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4020b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
4030b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
4040b57cec5SDimitry Andric                   VEX, VEX_WIG;
4050b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4060b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
4070b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>,
4080b57cec5SDimitry Andric                   VEX, VEX_WIG;
4090b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4100b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
4110b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>,
4120b57cec5SDimitry Andric                   VEX, VEX_WIG;
4130b57cec5SDimitry Andric} // SchedRW
4140b57cec5SDimitry Andric
4150b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
4160b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4170b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
4180b57cec5SDimitry Andric                   [(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
4190b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
4200b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4210b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
4220b57cec5SDimitry Andric                   [(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
4230b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
4240b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4250b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
4260b57cec5SDimitry Andric                   [(store (v8f32 VR256:$src), addr:$dst)]>,
4270b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
4280b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
4290b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
4300b57cec5SDimitry Andric                   [(store (v4f64 VR256:$src), addr:$dst)]>,
4310b57cec5SDimitry Andric                   VEX, VEX_L, VEX_WIG;
4320b57cec5SDimitry Andric} // SchedRW
4330b57cec5SDimitry Andric} // Predicate
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric// For disassembler
4360b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
4370b57cec5SDimitry Andric    isMoveReg = 1 in {
4380b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
4390b57cec5SDimitry Andric  def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
4400b57cec5SDimitry Andric                          (ins VR128:$src),
4410b57cec5SDimitry Andric                          "movaps\t{$src, $dst|$dst, $src}", []>,
4420b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">;
4430b57cec5SDimitry Andric  def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
4440b57cec5SDimitry Andric                           (ins VR128:$src),
4450b57cec5SDimitry Andric                           "movapd\t{$src, $dst|$dst, $src}", []>,
4460b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">;
4470b57cec5SDimitry Andric  def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
4480b57cec5SDimitry Andric                           (ins VR128:$src),
4490b57cec5SDimitry Andric                           "movups\t{$src, $dst|$dst, $src}", []>,
4500b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">;
4510b57cec5SDimitry Andric  def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
4520b57cec5SDimitry Andric                           (ins VR128:$src),
4530b57cec5SDimitry Andric                           "movupd\t{$src, $dst|$dst, $src}", []>,
4540b57cec5SDimitry Andric                           VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
4550b57cec5SDimitry Andric} // SchedRW
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
4580b57cec5SDimitry Andric  def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
4590b57cec5SDimitry Andric                            (ins VR256:$src),
4600b57cec5SDimitry Andric                            "movaps\t{$src, $dst|$dst, $src}", []>,
4610b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">;
4620b57cec5SDimitry Andric  def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
4630b57cec5SDimitry Andric                            (ins VR256:$src),
4640b57cec5SDimitry Andric                            "movapd\t{$src, $dst|$dst, $src}", []>,
4650b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">;
4660b57cec5SDimitry Andric  def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
4670b57cec5SDimitry Andric                            (ins VR256:$src),
4680b57cec5SDimitry Andric                            "movups\t{$src, $dst|$dst, $src}", []>,
4690b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">;
4700b57cec5SDimitry Andric  def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
4710b57cec5SDimitry Andric                            (ins VR256:$src),
4720b57cec5SDimitry Andric                            "movupd\t{$src, $dst|$dst, $src}", []>,
4730b57cec5SDimitry Andric                            VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
4740b57cec5SDimitry Andric} // SchedRW
4750b57cec5SDimitry Andric} // Predicate
4760b57cec5SDimitry Andric
4770b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
4780b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
4790b57cec5SDimitry Andric                (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
4800b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
4810b57cec5SDimitry Andric                (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
4820b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
4830b57cec5SDimitry Andric                (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
4840b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
4850b57cec5SDimitry Andric                (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
4860b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
4870b57cec5SDimitry Andric                (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>;
4880b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}",
4890b57cec5SDimitry Andric                (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>;
4900b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}",
4910b57cec5SDimitry Andric                (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>;
4920b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}",
4930b57cec5SDimitry Andric                (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>;
4940b57cec5SDimitry Andric
4950b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
4960b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
4970b57cec5SDimitry Andric                   "movaps\t{$src, $dst|$dst, $src}",
4980b57cec5SDimitry Andric                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
4990b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5000b57cec5SDimitry Andric                   "movapd\t{$src, $dst|$dst, $src}",
5010b57cec5SDimitry Andric                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
5020b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5030b57cec5SDimitry Andric                   "movups\t{$src, $dst|$dst, $src}",
5040b57cec5SDimitry Andric                   [(store (v4f32 VR128:$src), addr:$dst)]>;
5050b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
5060b57cec5SDimitry Andric                   "movupd\t{$src, $dst|$dst, $src}",
5070b57cec5SDimitry Andric                   [(store (v2f64 VR128:$src), addr:$dst)]>;
5080b57cec5SDimitry Andric} // SchedRW
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric// For disassembler
5110b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
5120b57cec5SDimitry Andric    isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
5130b57cec5SDimitry Andric  def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5140b57cec5SDimitry Andric                         "movaps\t{$src, $dst|$dst, $src}", []>,
5150b57cec5SDimitry Andric                         FoldGenData<"MOVAPSrr">;
5160b57cec5SDimitry Andric  def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5170b57cec5SDimitry Andric                         "movapd\t{$src, $dst|$dst, $src}", []>,
5180b57cec5SDimitry Andric                         FoldGenData<"MOVAPDrr">;
5190b57cec5SDimitry Andric  def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5200b57cec5SDimitry Andric                         "movups\t{$src, $dst|$dst, $src}", []>,
5210b57cec5SDimitry Andric                         FoldGenData<"MOVUPSrr">;
5220b57cec5SDimitry Andric  def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
5230b57cec5SDimitry Andric                         "movupd\t{$src, $dst|$dst, $src}", []>,
5240b57cec5SDimitry Andric                         FoldGenData<"MOVUPDrr">;
5250b57cec5SDimitry Andric}
5260b57cec5SDimitry Andric
5270b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
5280b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}",
5290b57cec5SDimitry Andric                (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
5300b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}",
5310b57cec5SDimitry Andric                (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>;
5320b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}",
5330b57cec5SDimitry Andric                (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>;
5340b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}",
5350b57cec5SDimitry Andric                (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>;
5360b57cec5SDimitry Andric
5370b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5380b57cec5SDimitry Andric  // 256-bit load/store need to use floating point load/store in case we don't
5390b57cec5SDimitry Andric  // have AVX2. Execution domain fixing will convert to integer if AVX2 is
5400b57cec5SDimitry Andric  // available and changing the domain is beneficial.
5410b57cec5SDimitry Andric  def : Pat<(alignedloadv4i64 addr:$src),
5420b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5430b57cec5SDimitry Andric  def : Pat<(alignedloadv8i32 addr:$src),
5440b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5450b57cec5SDimitry Andric  def : Pat<(alignedloadv16i16 addr:$src),
5460b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5470b57cec5SDimitry Andric  def : Pat<(alignedloadv32i8 addr:$src),
5480b57cec5SDimitry Andric            (VMOVAPSYrm addr:$src)>;
5490b57cec5SDimitry Andric  def : Pat<(loadv4i64 addr:$src),
5500b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5510b57cec5SDimitry Andric  def : Pat<(loadv8i32 addr:$src),
5520b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5530b57cec5SDimitry Andric  def : Pat<(loadv16i16 addr:$src),
5540b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5550b57cec5SDimitry Andric  def : Pat<(loadv32i8 addr:$src),
5560b57cec5SDimitry Andric            (VMOVUPSYrm addr:$src)>;
5570b57cec5SDimitry Andric
5580b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
5590b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5600b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
5610b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5620b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
5630b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5640b57cec5SDimitry Andric  def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
5650b57cec5SDimitry Andric            (VMOVAPSYmr addr:$dst, VR256:$src)>;
5660b57cec5SDimitry Andric  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
5670b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5680b57cec5SDimitry Andric  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
5690b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5700b57cec5SDimitry Andric  def : Pat<(store (v16i16 VR256:$src), addr:$dst),
5710b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5720b57cec5SDimitry Andric  def : Pat<(store (v32i8 VR256:$src), addr:$dst),
5730b57cec5SDimitry Andric            (VMOVUPSYmr addr:$dst, VR256:$src)>;
5740b57cec5SDimitry Andric}
5750b57cec5SDimitry Andric
5760b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter).
5770b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU
5780b57cec5SDimitry Andric// during the SSE domain pass.
5790b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
5800b57cec5SDimitry Andric  def : Pat<(alignedloadv2i64 addr:$src),
5810b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
5820b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
5830b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
5840b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
5850b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
5860b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
5870b57cec5SDimitry Andric            (MOVAPSrm addr:$src)>;
5880b57cec5SDimitry Andric  def : Pat<(loadv2i64 addr:$src),
5890b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
5900b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
5910b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
5920b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
5930b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
5940b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
5950b57cec5SDimitry Andric            (MOVUPSrm addr:$src)>;
5960b57cec5SDimitry Andric
5970b57cec5SDimitry Andric  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
5980b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
5990b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
6000b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6010b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
6020b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6030b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
6040b57cec5SDimitry Andric            (MOVAPSmr addr:$dst, VR128:$src)>;
6050b57cec5SDimitry Andric  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
6060b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6070b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
6080b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6090b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
6100b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6110b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
6120b57cec5SDimitry Andric            (MOVUPSmr addr:$dst, VR128:$src)>;
6130b57cec5SDimitry Andric}
6140b57cec5SDimitry Andric
6150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6160b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions
6170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6180b57cec5SDimitry Andric
619*fe6060f1SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode,
6200b57cec5SDimitry Andric                                      string base_opc, string asm_opr> {
6210b57cec5SDimitry Andric  // No pattern as they need be special cased between high and low.
6220b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
6230b57cec5SDimitry Andric  def PSrm : PI<opc, MRMSrcMem,
6240b57cec5SDimitry Andric                (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
6250b57cec5SDimitry Andric                !strconcat(base_opc, "s", asm_opr),
6260b57cec5SDimitry Andric                [], SSEPackedSingle>, PS,
6270b57cec5SDimitry Andric                Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
6280b57cec5SDimitry Andric
6290b57cec5SDimitry Andric  def PDrm : PI<opc, MRMSrcMem,
6300b57cec5SDimitry Andric         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
6310b57cec5SDimitry Andric         !strconcat(base_opc, "d", asm_opr),
6320b57cec5SDimitry Andric     [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
6330b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src2)))))],
6340b57cec5SDimitry Andric              SSEPackedDouble>, PD,
6350b57cec5SDimitry Andric     Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
6360b57cec5SDimitry Andric}
6370b57cec5SDimitry Andric
6380b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
6390b57cec5SDimitry Andric                                 string base_opc> {
6400b57cec5SDimitry Andric  let Predicates = [UseAVX] in
6410b57cec5SDimitry Andric    defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
6420b57cec5SDimitry Andric                                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
6430b57cec5SDimitry Andric                                    VEX_4V, VEX_WIG;
6440b57cec5SDimitry Andric
6450b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
6460b57cec5SDimitry Andric    defm NAME : sse12_mov_hilo_packed_base<opc,  pdnode, base_opc,
6470b57cec5SDimitry Andric                                    "\t{$src2, $dst|$dst, $src2}">;
6480b57cec5SDimitry Andric}
6490b57cec5SDimitry Andric
6500b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
6510b57cec5SDimitry Andric
6520b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
6530b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
6540b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
6550b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6560b57cec5SDimitry Andric                     "movlps\t{$src, $dst|$dst, $src}",
6570b57cec5SDimitry Andric                     []>,
6580b57cec5SDimitry Andric                     VEX, VEX_WIG;
6590b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6600b57cec5SDimitry Andric                     "movlpd\t{$src, $dst|$dst, $src}",
6610b57cec5SDimitry Andric                     [(store (f64 (extractelt (v2f64 VR128:$src),
6620b57cec5SDimitry Andric                                   (iPTR 0))), addr:$dst)]>,
6630b57cec5SDimitry Andric                     VEX, VEX_WIG;
6640b57cec5SDimitry Andric}// UseAVX
6650b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
6660b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6670b57cec5SDimitry Andric                   "movlps\t{$src, $dst|$dst, $src}",
6680b57cec5SDimitry Andric                   []>;
6690b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
6700b57cec5SDimitry Andric                   "movlpd\t{$src, $dst|$dst, $src}",
6710b57cec5SDimitry Andric                   [(store (f64 (extractelt (v2f64 VR128:$src),
6720b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
6730b57cec5SDimitry Andric} // SchedRW
6740b57cec5SDimitry Andric
6750b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
6760b57cec5SDimitry Andric  // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
6770b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
6780b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
6798bcb0991SDimitry Andric  def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1,
6800b57cec5SDimitry Andric                      (i8 -28)),
6810b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
6820b57cec5SDimitry Andric  def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
6830b57cec5SDimitry Andric            (MOVLPSrm VR128:$src1, addr:$src2)>;
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzload64 addr:$src)),
6860b57cec5SDimitry Andric            (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
6870b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
6880b57cec5SDimitry Andric            (MOVLPSmr addr:$dst, VR128:$src)>;
6890b57cec5SDimitry Andric}
6900b57cec5SDimitry Andric
6910b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6920b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions
6930b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
6940b57cec5SDimitry Andric
6950b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">;
6960b57cec5SDimitry Andric
6970b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in {
6980b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low
6990b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible.
7000b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
7010b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
7020b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7030b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
7040b57cec5SDimitry Andric                   []>, VEX, VEX_WIG;
7050b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7060b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
7070b57cec5SDimitry Andric                   [(store (f64 (extractelt
7080b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
7090b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
7100b57cec5SDimitry Andric} // UseAVX
7110b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in
7120b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7130b57cec5SDimitry Andric                   "movhps\t{$src, $dst|$dst, $src}",
7140b57cec5SDimitry Andric                   []>;
7150b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
7160b57cec5SDimitry Andric                   "movhpd\t{$src, $dst|$dst, $src}",
7170b57cec5SDimitry Andric                   [(store (f64 (extractelt
7180b57cec5SDimitry Andric                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
7190b57cec5SDimitry Andric                                 (iPTR 0))), addr:$dst)]>;
7200b57cec5SDimitry Andric} // SchedRW
7210b57cec5SDimitry Andric
7220b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
7235ffd83dbSDimitry Andric  // MOVHPD patterns
7240b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
7250b57cec5SDimitry Andric            (VMOVHPDrm VR128:$src1, addr:$src2)>;
7260b57cec5SDimitry Andric
7270b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
7280b57cec5SDimitry Andric                          (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
7290b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
7300b57cec5SDimitry Andric            (VMOVHPDmr addr:$dst, VR128:$src)>;
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric  // MOVLPD patterns
7330b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
7340b57cec5SDimitry Andric            (VMOVLPDrm VR128:$src1, addr:$src2)>;
7350b57cec5SDimitry Andric}
7360b57cec5SDimitry Andric
7370b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
7380b57cec5SDimitry Andric  // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
7390b57cec5SDimitry Andric  // end up with a movsd or blend instead of shufp.
7400b57cec5SDimitry Andric  // No need for aligned load, we're only loading 64-bits.
7418bcb0991SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))),
7420b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
7430b57cec5SDimitry Andric  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
7440b57cec5SDimitry Andric            (MOVHPSrm VR128:$src1, addr:$src2)>;
7450b57cec5SDimitry Andric
7460b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
7470b57cec5SDimitry Andric                                addr:$dst),
7480b57cec5SDimitry Andric            (MOVHPSmr addr:$dst, VR128:$src)>;
7490b57cec5SDimitry Andric}
7500b57cec5SDimitry Andric
7510b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
7520b57cec5SDimitry Andric  // MOVHPD patterns
7530b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
7540b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
7550b57cec5SDimitry Andric
7560b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
7570b57cec5SDimitry Andric                          (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
7580b57cec5SDimitry Andric                          (iPTR 0))), addr:$dst),
7590b57cec5SDimitry Andric            (MOVHPDmr addr:$dst, VR128:$src)>;
7600b57cec5SDimitry Andric
7610b57cec5SDimitry Andric  // MOVLPD patterns
7620b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
7630b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
7640b57cec5SDimitry Andric}
7650b57cec5SDimitry Andric
7660b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
7670b57cec5SDimitry Andric  // Use MOVLPD to load into the low bits from a full vector unless we can use
7680b57cec5SDimitry Andric  // BLENDPD.
7698bcb0991SDimitry Andric  def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))),
7700b57cec5SDimitry Andric            (MOVLPDrm VR128:$src1, addr:$src2)>;
7710b57cec5SDimitry Andric}
7720b57cec5SDimitry Andric
7730b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7740b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
7750b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7760b57cec5SDimitry Andric
7770b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
7780b57cec5SDimitry Andric  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
7790b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
7800b57cec5SDimitry Andric                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7810b57cec5SDimitry Andric                      [(set VR128:$dst,
7820b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
7830b57cec5SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
7840b57cec5SDimitry Andric  let isCommutable = 1 in
7850b57cec5SDimitry Andric  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
7860b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
7870b57cec5SDimitry Andric                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7880b57cec5SDimitry Andric                      [(set VR128:$dst,
7890b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
7900b57cec5SDimitry Andric                      VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
7910b57cec5SDimitry Andric                      NotMemoryFoldable;
7920b57cec5SDimitry Andric}
7930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
7940b57cec5SDimitry Andric  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
7950b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
7960b57cec5SDimitry Andric                      "movlhps\t{$src2, $dst|$dst, $src2}",
7970b57cec5SDimitry Andric                      [(set VR128:$dst,
7980b57cec5SDimitry Andric                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
7990b57cec5SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>;
8000b57cec5SDimitry Andric  let isCommutable = 1 in
8010b57cec5SDimitry Andric  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
8020b57cec5SDimitry Andric                                       (ins VR128:$src1, VR128:$src2),
8030b57cec5SDimitry Andric                      "movhlps\t{$src2, $dst|$dst, $src2}",
8040b57cec5SDimitry Andric                      [(set VR128:$dst,
8050b57cec5SDimitry Andric                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
8060b57cec5SDimitry Andric                      Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
8070b57cec5SDimitry Andric}
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8100b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions
8110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8120b57cec5SDimitry Andric
8130b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
814*fe6060f1SDimitry Andric                     SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag,
8150b57cec5SDimitry Andric                     string asm, string mem, X86FoldableSchedWrite sched,
816480093f4SDimitry Andric                     Domain d,
8170b57cec5SDimitry Andric                     SchedRead Int2Fpu = ReadDefault> {
818480093f4SDimitry Andric  let ExeDomain = d in {
8190b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
8200b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
8210b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
8220b57cec5SDimitry Andric              Sched<[sched, Int2Fpu]>;
8230b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
8240b57cec5SDimitry Andric              mem#"\t{$src, $dst|$dst, $src}",
8250b57cec5SDimitry Andric              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
8260b57cec5SDimitry Andric              Sched<[sched.Folded]>;
8270b57cec5SDimitry Andric  }
828480093f4SDimitry Andric}
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
8310b57cec5SDimitry Andric                       ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
8320b57cec5SDimitry Andric                       string asm, Domain d, X86FoldableSchedWrite sched> {
833480093f4SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
8340b57cec5SDimitry Andric  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
835480093f4SDimitry Andric             [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>,
8360b57cec5SDimitry Andric             Sched<[sched]>;
8370b57cec5SDimitry Andric  let mayLoad = 1 in
8380b57cec5SDimitry Andric  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
839480093f4SDimitry Andric             [(set RC:$dst, (DstTy (any_sint_to_fp
8400b57cec5SDimitry Andric                                    (SrcTy (ld_frag addr:$src)))))], d>,
8410b57cec5SDimitry Andric             Sched<[sched.Folded]>;
8420b57cec5SDimitry Andric}
8430b57cec5SDimitry Andric}
8440b57cec5SDimitry Andric
8450b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
8460b57cec5SDimitry Andric                          X86MemOperand x86memop, string asm, string mem,
847480093f4SDimitry Andric                          X86FoldableSchedWrite sched, Domain d> {
848480093f4SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
8490b57cec5SDimitry Andric  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
8500b57cec5SDimitry Andric              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
8510b57cec5SDimitry Andric              Sched<[sched, ReadDefault, ReadInt2Fpu]>;
8520b57cec5SDimitry Andric  let mayLoad = 1 in
8530b57cec5SDimitry Andric  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
8540b57cec5SDimitry Andric              (ins DstRC:$src1, x86memop:$src),
8550b57cec5SDimitry Andric              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
8560b57cec5SDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>;
8570b57cec5SDimitry Andric} // hasSideEffects = 0
8580b57cec5SDimitry Andric}
8590b57cec5SDimitry Andric
860480093f4SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
861480093f4SDimitry Andricdefm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
8620b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
863480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>,
8640b57cec5SDimitry Andric                                XS, VEX, VEX_LIG;
865480093f4SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
8660b57cec5SDimitry Andric                                "cvttss2si", "cvttss2si",
867480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>,
8680b57cec5SDimitry Andric                                XS, VEX, VEX_W, VEX_LIG;
869480093f4SDimitry Andricdefm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
8700b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
871480093f4SDimitry Andric                                WriteCvtSD2I, SSEPackedDouble>,
8720b57cec5SDimitry Andric                                XD, VEX, VEX_LIG;
873480093f4SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
8740b57cec5SDimitry Andric                                "cvttsd2si", "cvttsd2si",
875480093f4SDimitry Andric                                WriteCvtSD2I, SSEPackedDouble>,
8760b57cec5SDimitry Andric                                XD, VEX, VEX_W, VEX_LIG;
8775ffd83dbSDimitry Andric
8785ffd83dbSDimitry Andricdefm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
8795ffd83dbSDimitry Andric                               "cvtss2si", "cvtss2si",
8805ffd83dbSDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>,
8815ffd83dbSDimitry Andric                               XS, VEX, VEX_LIG;
8825ffd83dbSDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
8835ffd83dbSDimitry Andric                               "cvtss2si", "cvtss2si",
8845ffd83dbSDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>,
8855ffd83dbSDimitry Andric                               XS, VEX, VEX_W, VEX_LIG;
8865ffd83dbSDimitry Andricdefm VCVTSD2SI   : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
8875ffd83dbSDimitry Andric                               "cvtsd2si", "cvtsd2si",
8885ffd83dbSDimitry Andric                               WriteCvtSD2I, SSEPackedDouble>,
8895ffd83dbSDimitry Andric                               XD, VEX, VEX_LIG;
8905ffd83dbSDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
8915ffd83dbSDimitry Andric                               "cvtsd2si", "cvtsd2si",
8925ffd83dbSDimitry Andric                               WriteCvtSD2I, SSEPackedDouble>,
8935ffd83dbSDimitry Andric                               XD, VEX, VEX_W, VEX_LIG;
8940b57cec5SDimitry Andric}
8950b57cec5SDimitry Andric
8960b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx
8970b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands,
8980b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly
8990b57cec5SDimitry Andric// where appropriate to do so.
9000b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
9010b57cec5SDimitry Andricdefm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
902480093f4SDimitry Andric                                  WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
903480093f4SDimitry Andric                                  VEX_LIG, SIMD_EXC;
9040b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
905480093f4SDimitry Andric                                  WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
906480093f4SDimitry Andric                                  VEX_W, VEX_LIG, SIMD_EXC;
9070b57cec5SDimitry Andricdefm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
908480093f4SDimitry Andric                                  WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
909480093f4SDimitry Andric                                  VEX_LIG;
9100b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
911480093f4SDimitry Andric                                  WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
912480093f4SDimitry Andric                                  VEX_W, VEX_LIG, SIMD_EXC;
9130b57cec5SDimitry Andric} // isCodeGenOnly = 1
9140b57cec5SDimitry Andric
9150b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
916480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
9170b57cec5SDimitry Andric            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
918480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
9190b57cec5SDimitry Andric            (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
920480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
9210b57cec5SDimitry Andric            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
922480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
9230b57cec5SDimitry Andric            (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
9240b57cec5SDimitry Andric
925480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp GR32:$src)),
9260b57cec5SDimitry Andric            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
927480093f4SDimitry Andric  def : Pat<(f32 (any_sint_to_fp GR64:$src)),
9280b57cec5SDimitry Andric            (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
929480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp GR32:$src)),
9300b57cec5SDimitry Andric            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
931480093f4SDimitry Andric  def : Pat<(f64 (any_sint_to_fp GR64:$src)),
9320b57cec5SDimitry Andric            (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
9335ffd83dbSDimitry Andric
9345ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
9355ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
9365ffd83dbSDimitry Andric
9375ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
9385ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
9390b57cec5SDimitry Andric}
9400b57cec5SDimitry Andric
9410b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
942480093f4SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
9430b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
944480093f4SDimitry Andric                      WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
945480093f4SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
9460b57cec5SDimitry Andric                      "cvttss2si", "cvttss2si",
947480093f4SDimitry Andric                      WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
948480093f4SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
9490b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
950480093f4SDimitry Andric                      WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
951480093f4SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
9520b57cec5SDimitry Andric                      "cvttsd2si", "cvttsd2si",
953480093f4SDimitry Andric                      WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
9545ffd83dbSDimitry Andric
9555ffd83dbSDimitry Andricdefm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
9565ffd83dbSDimitry Andric                     "cvtss2si", "cvtss2si",
9575ffd83dbSDimitry Andric                     WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
9585ffd83dbSDimitry Andricdefm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
9595ffd83dbSDimitry Andric                     "cvtss2si", "cvtss2si",
9605ffd83dbSDimitry Andric                     WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
9615ffd83dbSDimitry Andricdefm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
9625ffd83dbSDimitry Andric                     "cvtsd2si", "cvtsd2si",
9635ffd83dbSDimitry Andric                     WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
9645ffd83dbSDimitry Andricdefm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
9655ffd83dbSDimitry Andric                     "cvtsd2si", "cvtsd2si",
9665ffd83dbSDimitry Andric                     WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
9675ffd83dbSDimitry Andric
968480093f4SDimitry Andricdefm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
9690b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{l}",
970480093f4SDimitry Andric                      WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
971480093f4SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
9720b57cec5SDimitry Andric                      "cvtsi2ss", "cvtsi2ss{q}",
973480093f4SDimitry Andric                      WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
974480093f4SDimitry Andricdefm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
9750b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{l}",
976480093f4SDimitry Andric                      WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
977480093f4SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
9780b57cec5SDimitry Andric                      "cvtsi2sd", "cvtsi2sd{q}",
979480093f4SDimitry Andric                      WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
9800b57cec5SDimitry Andric} // isCodeGenOnly = 1
9810b57cec5SDimitry Andric
9825ffd83dbSDimitry Andriclet Predicates = [UseSSE1] in {
9835ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
9845ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
9855ffd83dbSDimitry Andric}
9865ffd83dbSDimitry Andric
9875ffd83dbSDimitry Andriclet Predicates = [UseSSE2] in {
9885ffd83dbSDimitry Andric  def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
9895ffd83dbSDimitry Andric  def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
9905ffd83dbSDimitry Andric}
9915ffd83dbSDimitry Andric
9920b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM
9930b57cec5SDimitry Andric// and/or XMM operand(s).
9940b57cec5SDimitry Andric
9950b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
9960b57cec5SDimitry Andric                          ValueType DstVT, ValueType SrcVT, SDNode OpNode,
9975ffd83dbSDimitry Andric                          Operand memop, PatFrags mem_frags, string asm,
998480093f4SDimitry Andric                          X86FoldableSchedWrite sched, Domain d> {
999480093f4SDimitry Andriclet ExeDomain = d in {
10000b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
10010b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
10020b57cec5SDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
10030b57cec5SDimitry Andric               Sched<[sched]>;
10040b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
10050b57cec5SDimitry Andric                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
10065ffd83dbSDimitry Andric                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>,
10070b57cec5SDimitry Andric               Sched<[sched.Folded]>;
10080b57cec5SDimitry Andric}
1009480093f4SDimitry Andric}
10100b57cec5SDimitry Andric
10110b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
10120b57cec5SDimitry Andric                    RegisterClass DstRC, X86MemOperand x86memop,
10130b57cec5SDimitry Andric                    string asm, string mem, X86FoldableSchedWrite sched,
1014480093f4SDimitry Andric                    Domain d, bit Is2Addr = 1> {
1015480093f4SDimitry Andriclet hasSideEffects = 0, ExeDomain = d in {
10160b57cec5SDimitry Andric  def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
10170b57cec5SDimitry Andric                  !if(Is2Addr,
10180b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
10190b57cec5SDimitry Andric                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
10200b57cec5SDimitry Andric                  []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
10210b57cec5SDimitry Andric  let mayLoad = 1 in
10220b57cec5SDimitry Andric  def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
10230b57cec5SDimitry Andric                  (ins DstRC:$src1, x86memop:$src2),
10240b57cec5SDimitry Andric                  !if(Is2Addr,
10250b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
10260b57cec5SDimitry Andric                      asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10270b57cec5SDimitry Andric                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
10280b57cec5SDimitry Andric}
10290b57cec5SDimitry Andric}
10300b57cec5SDimitry Andric
1031480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
10320b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
10330b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
10340b57cec5SDimitry Andric                  X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
1035480093f4SDimitry Andric                  WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
10360b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
10370b57cec5SDimitry Andric                    X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
1038480093f4SDimitry Andric                    WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG;
10390b57cec5SDimitry Andric}
10400b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
1041480093f4SDimitry Andric                 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
1042480093f4SDimitry Andric                 SSEPackedDouble>, XD;
10430b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
1044480093f4SDimitry Andric                   sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
1045480093f4SDimitry Andric                   SSEPackedDouble>, XD, REX_W;
1046480093f4SDimitry Andric}
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
10490b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1050480093f4SDimitry Andric          i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>,
1051480093f4SDimitry Andric          XS, VEX_4V, VEX_LIG, SIMD_EXC;
10520b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1053480093f4SDimitry Andric          i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
1054480093f4SDimitry Andric          XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
10550b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1056480093f4SDimitry Andric          i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
1057480093f4SDimitry Andric          XD, VEX_4V, VEX_LIG;
10580b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1059480093f4SDimitry Andric          i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
1060480093f4SDimitry Andric          XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
10610b57cec5SDimitry Andric}
10620b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
10630b57cec5SDimitry Andric  defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1064480093f4SDimitry Andric                        i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>,
1065480093f4SDimitry Andric                        XS, SIMD_EXC;
10660b57cec5SDimitry Andric  defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1067480093f4SDimitry Andric                        i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>,
1068480093f4SDimitry Andric                        XS, REX_W, SIMD_EXC;
10690b57cec5SDimitry Andric  defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1070480093f4SDimitry Andric                        i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>,
1071480093f4SDimitry Andric                        XD;
10720b57cec5SDimitry Andric  defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1073480093f4SDimitry Andric                        i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>,
1074480093f4SDimitry Andric                        XD, REX_W, SIMD_EXC;
10750b57cec5SDimitry Andric}
10760b57cec5SDimitry Andric
10770b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10780b57cec5SDimitry Andric               (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
10790b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10800b57cec5SDimitry Andric               (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
10810b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10820b57cec5SDimitry Andric               (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
10830b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10840b57cec5SDimitry Andric               (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
10850b57cec5SDimitry Andric
10860b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
10870b57cec5SDimitry Andric              (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
10880b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
10890b57cec5SDimitry Andric              (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
10900b57cec5SDimitry Andric
10910b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
10920b57cec5SDimitry Andric                (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
10930b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
10940b57cec5SDimitry Andric                (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
10950b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
10960b57cec5SDimitry Andric                (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
10970b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
10980b57cec5SDimitry Andric                (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
10990b57cec5SDimitry Andric
11000b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
11010b57cec5SDimitry Andric                (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
11020b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
11030b57cec5SDimitry Andric                (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
11040b57cec5SDimitry Andric
11050b57cec5SDimitry Andric/// SSE 1 Only
11060b57cec5SDimitry Andric
11070b57cec5SDimitry Andric// Aliases for intrinsics
1108480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
11090b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
11100b57cec5SDimitry Andric                                ssmem, sse_load_f32, "cvttss2si",
1111480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
11120b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
11130b57cec5SDimitry Andric                               X86cvtts2Int, ssmem, sse_load_f32,
1114480093f4SDimitry Andric                               "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
11150b57cec5SDimitry Andric                               XS, VEX, VEX_LIG, VEX_W;
11160b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
11170b57cec5SDimitry Andric                                sdmem, sse_load_f64, "cvttsd2si",
1118480093f4SDimitry Andric                                WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
11190b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
11200b57cec5SDimitry Andric                              X86cvtts2Int, sdmem, sse_load_f64,
1121480093f4SDimitry Andric                              "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
11220b57cec5SDimitry Andric                              XD, VEX, VEX_LIG, VEX_W;
11230b57cec5SDimitry Andric}
1124480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
11250b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
11260b57cec5SDimitry Andric                                    ssmem, sse_load_f32, "cvttss2si",
1127480093f4SDimitry Andric                                    WriteCvtSS2I, SSEPackedSingle>, XS;
11280b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
11290b57cec5SDimitry Andric                                   X86cvtts2Int, ssmem, sse_load_f32,
1130480093f4SDimitry Andric                                   "cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
1131480093f4SDimitry Andric                                   XS, REX_W;
11320b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
11330b57cec5SDimitry Andric                                    sdmem, sse_load_f64, "cvttsd2si",
1134480093f4SDimitry Andric                                    WriteCvtSD2I, SSEPackedDouble>, XD;
11350b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
11360b57cec5SDimitry Andric                                  X86cvtts2Int, sdmem, sse_load_f64,
1137480093f4SDimitry Andric                                  "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>,
1138480093f4SDimitry Andric                                  XD, REX_W;
1139480093f4SDimitry Andric}
11400b57cec5SDimitry Andric
11410b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
11420b57cec5SDimitry Andric                (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11430b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
11440b57cec5SDimitry Andric                (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
11450b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
11460b57cec5SDimitry Andric                (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11470b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
11480b57cec5SDimitry Andric                (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
11490b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
11500b57cec5SDimitry Andric                (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11510b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
11520b57cec5SDimitry Andric                (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
11530b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
11540b57cec5SDimitry Andric                (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11550b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
11560b57cec5SDimitry Andric                (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
11570b57cec5SDimitry Andric
11580b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
11590b57cec5SDimitry Andric                (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11600b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
11610b57cec5SDimitry Andric                (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
11620b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
11630b57cec5SDimitry Andric                (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
11640b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
11650b57cec5SDimitry Andric                (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
11660b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
11670b57cec5SDimitry Andric                (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11680b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
11690b57cec5SDimitry Andric                (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
11700b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
11710b57cec5SDimitry Andric                (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
11720b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
11730b57cec5SDimitry Andric                (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
11740b57cec5SDimitry Andric
1175480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
11760b57cec5SDimitry Andricdefm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
11770b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
1178480093f4SDimitry Andric                                  WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
11790b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
11800b57cec5SDimitry Andric                                  ssmem, sse_load_f32, "cvtss2si",
1181480093f4SDimitry Andric                                  WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG;
11820b57cec5SDimitry Andric}
1183480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
11840b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
11850b57cec5SDimitry Andric                               ssmem, sse_load_f32, "cvtss2si",
1186480093f4SDimitry Andric                               WriteCvtSS2I, SSEPackedSingle>, XS;
11870b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
11880b57cec5SDimitry Andric                                 ssmem, sse_load_f32, "cvtss2si",
1189480093f4SDimitry Andric                                 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W;
11900b57cec5SDimitry Andric
11910b57cec5SDimitry Andricdefm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
11920b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
11930b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PS>,
11940b57cec5SDimitry Andric                               PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
11950b57cec5SDimitry Andricdefm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
11960b57cec5SDimitry Andric                               "vcvtdq2ps\t{$src, $dst|$dst, $src}",
11970b57cec5SDimitry Andric                               SSEPackedSingle, WriteCvtI2PSY>,
11980b57cec5SDimitry Andric                               PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
11990b57cec5SDimitry Andric
12000b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
12010b57cec5SDimitry Andric                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
12020b57cec5SDimitry Andric                            SSEPackedSingle, WriteCvtI2PS>,
12030b57cec5SDimitry Andric                            PS, Requires<[UseSSE2]>;
1204480093f4SDimitry Andric}
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andric// AVX aliases
12070b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
12080b57cec5SDimitry Andric                (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12090b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
12100b57cec5SDimitry Andric                (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
12110b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
12120b57cec5SDimitry Andric                (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12130b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
12140b57cec5SDimitry Andric                (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
12150b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
12160b57cec5SDimitry Andric                (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12170b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
12180b57cec5SDimitry Andric                (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
12190b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
12200b57cec5SDimitry Andric                (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12210b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
12220b57cec5SDimitry Andric                (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
12230b57cec5SDimitry Andric
12240b57cec5SDimitry Andric// SSE aliases
12250b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
12260b57cec5SDimitry Andric                (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12270b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
12280b57cec5SDimitry Andric                (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">;
12290b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
12300b57cec5SDimitry Andric                (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
12310b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
12320b57cec5SDimitry Andric                (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">;
12330b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
12340b57cec5SDimitry Andric                (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12350b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
12360b57cec5SDimitry Andric                (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">;
12370b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
12380b57cec5SDimitry Andric                (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
12390b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
12400b57cec5SDimitry Andric                (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
12410b57cec5SDimitry Andric
12420b57cec5SDimitry Andric/// SSE 2 Only
12430b57cec5SDimitry Andric
12440b57cec5SDimitry Andric// Convert scalar double to scalar single
1245e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
1246e8d8bef9SDimitry Andric    ExeDomain = SSEPackedSingle in {
12470b57cec5SDimitry Andricdef VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
12480b57cec5SDimitry Andric                        (ins FR32:$src1, FR64:$src2),
12490b57cec5SDimitry Andric                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
12500b57cec5SDimitry Andric                        VEX_4V, VEX_LIG, VEX_WIG,
1251480093f4SDimitry Andric                        Sched<[WriteCvtSD2SS]>, SIMD_EXC;
12520b57cec5SDimitry Andriclet mayLoad = 1 in
12530b57cec5SDimitry Andricdef VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
12540b57cec5SDimitry Andric                     (ins FR32:$src1, f64mem:$src2),
12550b57cec5SDimitry Andric                     "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
12560b57cec5SDimitry Andric                     XD, VEX_4V, VEX_LIG, VEX_WIG,
1257480093f4SDimitry Andric                     Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
12580b57cec5SDimitry Andric}
12590b57cec5SDimitry Andric
1260480093f4SDimitry Andricdef : Pat<(f32 (any_fpround FR64:$src)),
12610b57cec5SDimitry Andric            (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
12620b57cec5SDimitry Andric          Requires<[UseAVX]>;
12630b57cec5SDimitry Andric
1264e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
12650b57cec5SDimitry Andricdef CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
12660b57cec5SDimitry Andric                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1267480093f4SDimitry Andric                      [(set FR32:$dst, (any_fpround FR64:$src))]>,
1268480093f4SDimitry Andric                      Sched<[WriteCvtSD2SS]>, SIMD_EXC;
12690b57cec5SDimitry Andricdef CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
12700b57cec5SDimitry Andric                    "cvtsd2ss\t{$src, $dst|$dst, $src}",
1271480093f4SDimitry Andric                    [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
12720b57cec5SDimitry Andric                    XD, Requires<[UseSSE2, OptForSize]>,
1273480093f4SDimitry Andric                    Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
12740b57cec5SDimitry Andric}
12750b57cec5SDimitry Andric
1276e8d8bef9SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
12770b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
12780b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
12790b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
12800b57cec5SDimitry Andric                       [(set VR128:$dst,
12810b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
12820b57cec5SDimitry Andric                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
12830b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS]>;
12840b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
12850b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
12860b57cec5SDimitry Andric                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
12870b57cec5SDimitry Andric                       [(set VR128:$dst,
12885ffd83dbSDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
12890b57cec5SDimitry Andric                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
12900b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
12910b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
12920b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
12930b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
12940b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
12950b57cec5SDimitry Andric                       [(set VR128:$dst,
12960b57cec5SDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
12970b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
12980b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
12990b57cec5SDimitry Andric                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
13000b57cec5SDimitry Andric                       "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
13010b57cec5SDimitry Andric                       [(set VR128:$dst,
13025ffd83dbSDimitry Andric                         (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
13030b57cec5SDimitry Andric                       XD, Requires<[UseSSE2]>,
13040b57cec5SDimitry Andric                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
13050b57cec5SDimitry Andric}
1306480093f4SDimitry Andric}
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andric// Convert scalar single to scalar double
13090b57cec5SDimitry Andric// SSE2 instructions with XS prefix
1310e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
13110b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
13120b57cec5SDimitry Andric                    (ins FR64:$src1, FR32:$src2),
13130b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
13140b57cec5SDimitry Andric                    XS, VEX_4V, VEX_LIG, VEX_WIG,
1315480093f4SDimitry Andric                    Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
13160b57cec5SDimitry Andriclet mayLoad = 1 in
13170b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
13180b57cec5SDimitry Andric                    (ins FR64:$src1, f32mem:$src2),
13190b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
13200b57cec5SDimitry Andric                    XS, VEX_4V, VEX_LIG, VEX_WIG,
13210b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
1322480093f4SDimitry Andric                    Requires<[UseAVX, OptForSize]>, SIMD_EXC;
13230b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0
13240b57cec5SDimitry Andric
1325480093f4SDimitry Andricdef : Pat<(f64 (any_fpextend FR32:$src)),
13260b57cec5SDimitry Andric    (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
1327480093f4SDimitry Andricdef : Pat<(any_fpextend (loadf32 addr:$src)),
13280b57cec5SDimitry Andric    (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
13290b57cec5SDimitry Andric
1330e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
13310b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
13320b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1333480093f4SDimitry Andric                   [(set FR64:$dst, (any_fpextend FR32:$src))]>,
1334480093f4SDimitry Andric                   XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
13350b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
13360b57cec5SDimitry Andric                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1337480093f4SDimitry Andric                   [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
13380b57cec5SDimitry Andric                   XS, Requires<[UseSSE2, OptForSize]>,
1339480093f4SDimitry Andric                   Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
13400b57cec5SDimitry Andric} // isCodeGenOnly = 1
13410b57cec5SDimitry Andric
1342e8d8bef9SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
1343e8d8bef9SDimitry Andric    ExeDomain = SSEPackedSingle in {
13440b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
13450b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
13460b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
13470b57cec5SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
13480b57cec5SDimitry Andric                    Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
13490b57cec5SDimitry Andriclet mayLoad = 1 in
13500b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
13510b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
13520b57cec5SDimitry Andric                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
13530b57cec5SDimitry Andric                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
13540b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
13550b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
13560b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
13570b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
13580b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
13590b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
13600b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD]>;
13610b57cec5SDimitry Andriclet mayLoad = 1 in
13620b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
13630b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
13640b57cec5SDimitry Andric                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
13650b57cec5SDimitry Andric                    []>, XS, Requires<[UseSSE2]>,
13660b57cec5SDimitry Andric                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
13670b57cec5SDimitry Andric}
13680b57cec5SDimitry Andric} // hasSideEffects = 0
13690b57cec5SDimitry Andric
13700b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
13710b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
13720b57cec5SDimitry Andric// vmovs{s,d} instructions
13730b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
13740b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
13750b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
13760b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1377480093f4SDimitry Andric                     (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
13780b57cec5SDimitry Andric          (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
13790b57cec5SDimitry Andric
13800b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
13810b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
13820b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1383480093f4SDimitry Andric                     (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
13840b57cec5SDimitry Andric          (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
13850b57cec5SDimitry Andric
13860b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
13870b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1388480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
13890b57cec5SDimitry Andric          (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
13900b57cec5SDimitry Andric
13910b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
13920b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1393480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
13940b57cec5SDimitry Andric          (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
13950b57cec5SDimitry Andric
13960b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
13970b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1398480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
13990b57cec5SDimitry Andric          (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
14000b57cec5SDimitry Andric
14010b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14020b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1403480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
14040b57cec5SDimitry Andric          (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
14050b57cec5SDimitry Andric
14060b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14070b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1408480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
14090b57cec5SDimitry Andric          (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
14100b57cec5SDimitry Andric
14110b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14120b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1413480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
14140b57cec5SDimitry Andric          (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
14150b57cec5SDimitry Andric
14160b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14170b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1418480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
14190b57cec5SDimitry Andric          (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
14200b57cec5SDimitry Andric
14210b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14220b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1423480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
14240b57cec5SDimitry Andric          (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
14250b57cec5SDimitry Andric} // Predicates = [UseAVX]
14260b57cec5SDimitry Andric
14270b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
14280b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14290b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
14300b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector
1431480093f4SDimitry Andric                     (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
14320b57cec5SDimitry Andric          (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
14330b57cec5SDimitry Andric
14340b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14350b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
14360b57cec5SDimitry Andric                   (v2f64 (scalar_to_vector
1437480093f4SDimitry Andric                     (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
14380b57cec5SDimitry Andric          (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
14390b57cec5SDimitry Andric
14400b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14410b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1442480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
14430b57cec5SDimitry Andric          (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
14440b57cec5SDimitry Andric
14450b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14460b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1447480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
14480b57cec5SDimitry Andric          (CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
14490b57cec5SDimitry Andric
14500b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14510b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1452480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
14530b57cec5SDimitry Andric          (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
14540b57cec5SDimitry Andric
14550b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd
14560b57cec5SDimitry Andric                   (v2f64 VR128:$dst),
1457480093f4SDimitry Andric                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
14580b57cec5SDimitry Andric          (CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
14590b57cec5SDimitry Andric} // Predicates = [UseSSE2]
14600b57cec5SDimitry Andric
14610b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
14620b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14630b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1464480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
14650b57cec5SDimitry Andric          (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
14660b57cec5SDimitry Andric
14670b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14680b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1469480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
14700b57cec5SDimitry Andric          (CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
14710b57cec5SDimitry Andric
14720b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14730b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1474480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
14750b57cec5SDimitry Andric          (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
14760b57cec5SDimitry Andric
14770b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss
14780b57cec5SDimitry Andric                   (v4f32 VR128:$dst),
1479480093f4SDimitry Andric                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
14800b57cec5SDimitry Andric          (CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
14810b57cec5SDimitry Andric} // Predicates = [UseSSE1]
14820b57cec5SDimitry Andric
14830b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
14840b57cec5SDimitry Andric// Convert packed single/double fp to doubleword
14850b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
14860b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
14870b57cec5SDimitry Andric                       [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
1488480093f4SDimitry Andric                       VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC;
14890b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
14900b57cec5SDimitry Andric                       "cvtps2dq\t{$src, $dst|$dst, $src}",
14910b57cec5SDimitry Andric                       [(set VR128:$dst,
14920b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
1493480093f4SDimitry Andric                       VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC;
14940b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
14950b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
14960b57cec5SDimitry Andric                        [(set VR256:$dst,
14970b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
1498480093f4SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC;
14990b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
15000b57cec5SDimitry Andric                        "cvtps2dq\t{$src, $dst|$dst, $src}",
15010b57cec5SDimitry Andric                        [(set VR256:$dst,
15020b57cec5SDimitry Andric                          (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
1503480093f4SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC;
15040b57cec5SDimitry Andric}
15050b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15060b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
15070b57cec5SDimitry Andric                     [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
1508480093f4SDimitry Andric                     Sched<[WriteCvtPS2I]>, SIMD_EXC;
15090b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15100b57cec5SDimitry Andric                     "cvtps2dq\t{$src, $dst|$dst, $src}",
15110b57cec5SDimitry Andric                     [(set VR128:$dst,
15120b57cec5SDimitry Andric                       (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>,
1513480093f4SDimitry Andric                     Sched<[WriteCvtPS2ILd]>, SIMD_EXC;
15140b57cec5SDimitry Andric
15150b57cec5SDimitry Andric
15160b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers
1517480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
15180b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
15190b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
15200b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
15210b57cec5SDimitry Andricdef VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15220b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
15230b57cec5SDimitry Andric                       [(set VR128:$dst,
15240b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
15250b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
15260b57cec5SDimitry Andric
15270b57cec5SDimitry Andric// XMM only
15280b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15290b57cec5SDimitry Andric                      "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
15300b57cec5SDimitry Andric                      [(set VR128:$dst,
15310b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
15320b57cec5SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, VEX_WIG;
15330b57cec5SDimitry Andric
15340b57cec5SDimitry Andric// YMM only
15350b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
15360b57cec5SDimitry Andric                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
15370b57cec5SDimitry Andric                       [(set VR128:$dst,
15380b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
15390b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
15400b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
15410b57cec5SDimitry Andric                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
15420b57cec5SDimitry Andric                       [(set VR128:$dst,
15430b57cec5SDimitry Andric                         (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
15440b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
15450b57cec5SDimitry Andric}
15460b57cec5SDimitry Andric
15470b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
15480b57cec5SDimitry Andric                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
15490b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
15500b57cec5SDimitry Andric                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
15510b57cec5SDimitry Andric
15520b57cec5SDimitry Andricdef CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15530b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
15540b57cec5SDimitry Andric                      [(set VR128:$dst,
15550b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>,
1556480093f4SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
15570b57cec5SDimitry Andricdef CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15580b57cec5SDimitry Andric                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
15590b57cec5SDimitry Andric                      [(set VR128:$dst,
15600b57cec5SDimitry Andric                        (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
1561480093f4SDimitry Andric                      Sched<[WriteCvtPD2I]>, SIMD_EXC;
15620b57cec5SDimitry Andric
15630b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword
15640b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix
1565480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
15660b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
15670b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15680b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
15690b57cec5SDimitry Andric                         [(set VR128:$dst,
1570480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
15710b57cec5SDimitry Andric                         VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
15720b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15730b57cec5SDimitry Andric                         "cvttps2dq\t{$src, $dst|$dst, $src}",
15740b57cec5SDimitry Andric                         [(set VR128:$dst,
1575480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>,
15760b57cec5SDimitry Andric                         VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
15770b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
15780b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
15790b57cec5SDimitry Andric                          [(set VR256:$dst,
1580480093f4SDimitry Andric                            (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>,
15810b57cec5SDimitry Andric                          VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
15820b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
15830b57cec5SDimitry Andric                          "cvttps2dq\t{$src, $dst|$dst, $src}",
15840b57cec5SDimitry Andric                          [(set VR256:$dst,
1585480093f4SDimitry Andric                            (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>,
15860b57cec5SDimitry Andric                          VEX, VEX_L,
15870b57cec5SDimitry Andric                          Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
15880b57cec5SDimitry Andric}
15890b57cec5SDimitry Andric
15900b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
15910b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
15920b57cec5SDimitry Andric                       [(set VR128:$dst,
1593480093f4SDimitry Andric                         (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
15940b57cec5SDimitry Andric                       Sched<[WriteCvtPS2I]>;
15950b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
15960b57cec5SDimitry Andric                       "cvttps2dq\t{$src, $dst|$dst, $src}",
15970b57cec5SDimitry Andric                       [(set VR128:$dst,
1598480093f4SDimitry Andric                         (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>,
15990b57cec5SDimitry Andric                       Sched<[WriteCvtPS2ILd]>;
1600480093f4SDimitry Andric}
16010b57cec5SDimitry Andric
16020b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
16030b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
16040b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1605480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
16060b57cec5SDimitry Andric// XMM only
16070b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16080b57cec5SDimitry Andric                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
16090b57cec5SDimitry Andric                        [(set VR128:$dst,
1610480093f4SDimitry Andric                          (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
16110b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
16120b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
16130b57cec5SDimitry Andric                        "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
16140b57cec5SDimitry Andric                        [(set VR128:$dst,
1615480093f4SDimitry Andric                          (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>,
16160b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
16170b57cec5SDimitry Andric
16180b57cec5SDimitry Andric// YMM only
16190b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
16200b57cec5SDimitry Andric                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
16210b57cec5SDimitry Andric                         [(set VR128:$dst,
1622480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>,
16230b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
16240b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
16250b57cec5SDimitry Andric                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
16260b57cec5SDimitry Andric                         [(set VR128:$dst,
1627480093f4SDimitry Andric                           (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>,
16280b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
16290b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
16300b57cec5SDimitry Andric
16310b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
16320b57cec5SDimitry Andric                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
16330b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
16340b57cec5SDimitry Andric                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
16350b57cec5SDimitry Andric
16360b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1637480093f4SDimitry Andric  def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))),
16380b57cec5SDimitry Andric            (VCVTTPD2DQYrr VR256:$src)>;
1639480093f4SDimitry Andric  def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))),
16400b57cec5SDimitry Andric            (VCVTTPD2DQYrm addr:$src)>;
16410b57cec5SDimitry Andric}
16420b57cec5SDimitry Andric
16430b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16440b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
16450b57cec5SDimitry Andric                      [(set VR128:$dst,
1646480093f4SDimitry Andric                        (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
1647480093f4SDimitry Andric                      Sched<[WriteCvtPD2I]>, SIMD_EXC;
16480b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
16490b57cec5SDimitry Andric                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
16500b57cec5SDimitry Andric                      [(set VR128:$dst,
1651480093f4SDimitry Andric                        (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>,
1652480093f4SDimitry Andric                      Sched<[WriteCvtPD2ILd]>, SIMD_EXC;
16530b57cec5SDimitry Andric
16540b57cec5SDimitry Andric// Convert packed single to packed double
1655480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
16560b57cec5SDimitry Andric                  // SSE2 instructions without OpSize prefix
16570b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16580b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
1659480093f4SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
16600b57cec5SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
16610b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
16620b57cec5SDimitry Andric                    "vcvtps2pd\t{$src, $dst|$dst, $src}",
16630b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
16640b57cec5SDimitry Andric                    PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
16650b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
16660b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
1667480093f4SDimitry Andric                     [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
16680b57cec5SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
16690b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
16700b57cec5SDimitry Andric                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
16710b57cec5SDimitry Andric                     [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
16720b57cec5SDimitry Andric                     PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
16730b57cec5SDimitry Andric}
16740b57cec5SDimitry Andric
1675480093f4SDimitry Andriclet Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
16760b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16770b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
1678480093f4SDimitry Andric                   [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
16790b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD]>;
16800b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
16810b57cec5SDimitry Andric                   "cvtps2pd\t{$src, $dst|$dst, $src}",
16820b57cec5SDimitry Andric                   [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
16830b57cec5SDimitry Andric                   PS, Sched<[WriteCvtPS2PD.Folded]>;
16840b57cec5SDimitry Andric}
16850b57cec5SDimitry Andric
16860b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP
16870b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
16880b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
16890b57cec5SDimitry Andricdef VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
16900b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
16910b57cec5SDimitry Andric                        [(set VR128:$dst,
1692480093f4SDimitry Andric                          (v2f64 (X86any_VSintToFP
16930b57cec5SDimitry Andric                                  (bc_v4i32
16940b57cec5SDimitry Andric                                   (v2i64 (scalar_to_vector
16950b57cec5SDimitry Andric                                           (loadi64 addr:$src)))))))]>,
16960b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
16970b57cec5SDimitry Andricdef VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
16980b57cec5SDimitry Andric                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
16990b57cec5SDimitry Andric                        [(set VR128:$dst,
1700480093f4SDimitry Andric                          (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
17010b57cec5SDimitry Andric                        VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
17020b57cec5SDimitry Andricdef VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
17030b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17040b57cec5SDimitry Andric                         [(set VR256:$dst,
1705480093f4SDimitry Andric                           (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
17060b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
17070b57cec5SDimitry Andric                         VEX_WIG;
17080b57cec5SDimitry Andricdef VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
17090b57cec5SDimitry Andric                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
17100b57cec5SDimitry Andric                         [(set VR256:$dst,
1711480093f4SDimitry Andric                           (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
17120b57cec5SDimitry Andric                         VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
17130b57cec5SDimitry Andric}
17140b57cec5SDimitry Andric
17150b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in
17160b57cec5SDimitry Andricdef CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
17170b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
17180b57cec5SDimitry Andric                       [(set VR128:$dst,
1719480093f4SDimitry Andric                         (v2f64 (X86any_VSintToFP
17200b57cec5SDimitry Andric                                 (bc_v4i32
17210b57cec5SDimitry Andric                                  (v2i64 (scalar_to_vector
17220b57cec5SDimitry Andric                                          (loadi64 addr:$src)))))))]>,
17230b57cec5SDimitry Andric                       Sched<[WriteCvtI2PDLd]>;
17240b57cec5SDimitry Andricdef CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17250b57cec5SDimitry Andric                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
17260b57cec5SDimitry Andric                       [(set VR128:$dst,
1727480093f4SDimitry Andric                         (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
17280b57cec5SDimitry Andric                       Sched<[WriteCvtI2PD]>;
17290b57cec5SDimitry Andric
17300b57cec5SDimitry Andric// AVX register conversion intrinsics
17310b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
1732480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
17330b57cec5SDimitry Andric            (VCVTDQ2PDrm addr:$src)>;
17340b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
17350b57cec5SDimitry Andric
17360b57cec5SDimitry Andric// SSE2 register conversion intrinsics
17370b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1738480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
17390b57cec5SDimitry Andric            (CVTDQ2PDrm addr:$src)>;
17400b57cec5SDimitry Andric} // Predicates = [UseSSE2]
17410b57cec5SDimitry Andric
17420b57cec5SDimitry Andric// Convert packed double to packed single
17430b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm
17440b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead.
17450b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly.
1746480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
17470b57cec5SDimitry Andric// XMM only
17480b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17490b57cec5SDimitry Andric                       "cvtpd2ps\t{$src, $dst|$dst, $src}",
1750480093f4SDimitry Andric                       [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
17510b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
17520b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
17530b57cec5SDimitry Andric                       "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
1754480093f4SDimitry Andric                       [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
17550b57cec5SDimitry Andric                       VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
17560b57cec5SDimitry Andric
17570b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
17580b57cec5SDimitry Andric                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
1759480093f4SDimitry Andric                        [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
17600b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
17610b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
17620b57cec5SDimitry Andric                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
1763480093f4SDimitry Andric                        [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
17640b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
17650b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX]
17660b57cec5SDimitry Andric
17670b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
17680b57cec5SDimitry Andric                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
17690b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
17700b57cec5SDimitry Andric                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
17710b57cec5SDimitry Andric
17720b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
17730b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1774480093f4SDimitry Andric                     [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
1775480093f4SDimitry Andric                     Sched<[WriteCvtPD2PS]>, SIMD_EXC;
17760b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
17770b57cec5SDimitry Andric                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
1778480093f4SDimitry Andric                     [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
1779480093f4SDimitry Andric                     Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
17800b57cec5SDimitry Andric
17810b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
17820b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions
17830b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
17840b57cec5SDimitry Andric
17850b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
17860b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
17875ffd83dbSDimitry Andric                            Operand memop, SDNode OpNode, ValueType VT,
17880b57cec5SDimitry Andric                            PatFrag ld_frag, string asm,
17895ffd83dbSDimitry Andric                            X86FoldableSchedWrite sched,
17905ffd83dbSDimitry Andric                            PatFrags mem_frags> {
17915ffd83dbSDimitry Andric  def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
17925ffd83dbSDimitry Andric                    (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
17935ffd83dbSDimitry Andric                    [(set VR128:$dst, (OpNode (VT VR128:$src1),
17945ffd83dbSDimitry Andric                                              VR128:$src2, timm:$cc))]>,
17955ffd83dbSDimitry Andric           Sched<[sched]>, SIMD_EXC;
17965ffd83dbSDimitry Andric  let mayLoad = 1 in
17975ffd83dbSDimitry Andric  def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
17985ffd83dbSDimitry Andric                    (ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
17995ffd83dbSDimitry Andric                    [(set VR128:$dst, (OpNode (VT VR128:$src1),
18005ffd83dbSDimitry Andric                                              (mem_frags addr:$src2), timm:$cc))]>,
18015ffd83dbSDimitry Andric           Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18025ffd83dbSDimitry Andric
18035ffd83dbSDimitry Andric  let isCodeGenOnly = 1 in {
18040b57cec5SDimitry Andric    let isCommutable = 1 in
18050b57cec5SDimitry Andric    def rr : SIi8<0xC2, MRMSrcReg,
18060b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
18075ffd83dbSDimitry Andric                  [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
18085ffd83dbSDimitry Andric                  Sched<[sched]>, SIMD_EXC;
18090b57cec5SDimitry Andric    def rm : SIi8<0xC2, MRMSrcMem,
18100b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
18115ffd83dbSDimitry Andric                  [(set RC:$dst, (OpNode RC:$src1,
18128bcb0991SDimitry Andric                                         (ld_frag addr:$src2), timm:$cc))]>,
18135ffd83dbSDimitry Andric                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18140b57cec5SDimitry Andric  }
1815480093f4SDimitry Andric}
18160b57cec5SDimitry Andric
18170b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
18185ffd83dbSDimitry Andricdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
18190b57cec5SDimitry Andric                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
18200b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
18210b57cec5SDimitry Andric                 XS, VEX_4V, VEX_LIG, VEX_WIG;
18220b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
18235ffd83dbSDimitry Andricdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
18245ffd83dbSDimitry Andric                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
18250b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
18260b57cec5SDimitry Andric                 XD, VEX_4V, VEX_LIG, VEX_WIG;
18275ffd83dbSDimitry Andric
18280b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
18290b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
18305ffd83dbSDimitry Andric  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
18315ffd83dbSDimitry Andric                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
18320b57cec5SDimitry Andric                  SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
18330b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
18345ffd83dbSDimitry Andric  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
18355ffd83dbSDimitry Andric                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
18360b57cec5SDimitry Andric                  SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
18370b57cec5SDimitry Andric}
18380b57cec5SDimitry Andric
18390b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
1840*fe6060f1SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode,
18410b57cec5SDimitry Andric                         ValueType vt, X86MemOperand x86memop,
1842480093f4SDimitry Andric                         PatFrag ld_frag, string OpcodeStr, Domain d,
18435ffd83dbSDimitry Andric                         X86FoldableSchedWrite sched = WriteFComX> {
18445ffd83dbSDimitry Andric  let ExeDomain = d in {
18450b57cec5SDimitry Andric  def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
18460b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18470b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
18485ffd83dbSDimitry Andric          Sched<[sched]>, SIMD_EXC;
18490b57cec5SDimitry Andric  let mayLoad = 1 in
18500b57cec5SDimitry Andric  def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
18510b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18520b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
18530b57cec5SDimitry Andric                                           (ld_frag addr:$src2)))]>,
18545ffd83dbSDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18550b57cec5SDimitry Andric}
18560b57cec5SDimitry Andric}
18570b57cec5SDimitry Andric
18580b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
18590b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
18600b57cec5SDimitry Andric                             ValueType vt, Operand memop,
18615ffd83dbSDimitry Andric                             PatFrags mem_frags, string OpcodeStr,
1862480093f4SDimitry Andric                             Domain d,
18635ffd83dbSDimitry Andric                             X86FoldableSchedWrite sched = WriteFComX> {
18645ffd83dbSDimitry Andriclet ExeDomain = d in {
18650b57cec5SDimitry Andric  def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
18660b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18670b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
18685ffd83dbSDimitry Andric          Sched<[sched]>, SIMD_EXC;
18690b57cec5SDimitry Andriclet mayLoad = 1 in
18700b57cec5SDimitry Andric  def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
18710b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
18720b57cec5SDimitry Andric                     [(set EFLAGS, (OpNode (vt RC:$src1),
18735ffd83dbSDimitry Andric                                           (mem_frags addr:$src2)))]>,
18745ffd83dbSDimitry Andric          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
18750b57cec5SDimitry Andric}
1876480093f4SDimitry Andric}
18770b57cec5SDimitry Andric
18780b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
1879480093f4SDimitry Andric  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
1880480093f4SDimitry Andric                               "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
1881480093f4SDimitry Andric  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
1882480093f4SDimitry Andric                               "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
1883480093f4SDimitry Andric  defm VCOMISS  : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
1884480093f4SDimitry Andric                               "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
1885480093f4SDimitry Andric  defm VCOMISD  : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
1886480093f4SDimitry Andric                               "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
18870b57cec5SDimitry Andric
18880b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
18890b57cec5SDimitry Andric    defm VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
1890480093f4SDimitry Andric                      sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
18910b57cec5SDimitry Andric    defm VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
1892480093f4SDimitry Andric                      sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
18930b57cec5SDimitry Andric
18940b57cec5SDimitry Andric    defm VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
1895480093f4SDimitry Andric                       sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
18960b57cec5SDimitry Andric    defm VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
1897480093f4SDimitry Andric                       sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
18980b57cec5SDimitry Andric  }
1899480093f4SDimitry Andric  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
1900480093f4SDimitry Andric                                  "ucomiss", SSEPackedSingle>, PS;
1901480093f4SDimitry Andric  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
1902480093f4SDimitry Andric                                  "ucomisd", SSEPackedDouble>, PD;
1903480093f4SDimitry Andric  defm COMISS   : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
1904480093f4SDimitry Andric                                  "comiss", SSEPackedSingle>, PS;
1905480093f4SDimitry Andric  defm COMISD   : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
1906480093f4SDimitry Andric                                  "comisd", SSEPackedDouble>, PD;
19070b57cec5SDimitry Andric
19080b57cec5SDimitry Andric  let isCodeGenOnly = 1 in {
19090b57cec5SDimitry Andric    defm UCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
1910480093f4SDimitry Andric                            sse_load_f32, "ucomiss", SSEPackedSingle>, PS;
19110b57cec5SDimitry Andric    defm UCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
1912480093f4SDimitry Andric                            sse_load_f64, "ucomisd", SSEPackedDouble>, PD;
19130b57cec5SDimitry Andric
19140b57cec5SDimitry Andric    defm COMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
1915480093f4SDimitry Andric                                sse_load_f32, "comiss", SSEPackedSingle>, PS;
19160b57cec5SDimitry Andric    defm COMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
1917480093f4SDimitry Andric                                    sse_load_f64, "comisd", SSEPackedDouble>, PD;
19180b57cec5SDimitry Andric  }
19190b57cec5SDimitry Andric} // Defs = [EFLAGS]
19200b57cec5SDimitry Andric
19210b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions
19220b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
19230b57cec5SDimitry Andric                            ValueType VT, string asm,
19240b57cec5SDimitry Andric                            X86FoldableSchedWrite sched,
19250b57cec5SDimitry Andric                            Domain d, PatFrag ld_frag> {
19260b57cec5SDimitry Andric  let isCommutable = 1 in
19270b57cec5SDimitry Andric  def rri : PIi8<0xC2, MRMSrcReg,
19280b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
1929480093f4SDimitry Andric             [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
19305ffd83dbSDimitry Andric            Sched<[sched]>, SIMD_EXC;
19310b57cec5SDimitry Andric  def rmi : PIi8<0xC2, MRMSrcMem,
19320b57cec5SDimitry Andric             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
19330b57cec5SDimitry Andric             [(set RC:$dst,
1934480093f4SDimitry Andric               (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
19355ffd83dbSDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1936480093f4SDimitry Andric}
19370b57cec5SDimitry Andric
19380b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
19390b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
19400b57cec5SDimitry Andric               SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
19410b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
19420b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
19430b57cec5SDimitry Andric               SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
19440b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
19450b57cec5SDimitry Andric               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
19460b57cec5SDimitry Andric               SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
19470b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
19480b57cec5SDimitry Andric               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
19490b57cec5SDimitry Andric               SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
19500b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
19510b57cec5SDimitry Andric  defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
19520b57cec5SDimitry Andric                 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
19530b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
19540b57cec5SDimitry Andric  defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
19550b57cec5SDimitry Andric                 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
19560b57cec5SDimitry Andric                 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
19570b57cec5SDimitry Andric}
19580b57cec5SDimitry Andric
19598bcb0991SDimitry Andricdef CommutableCMPCC : PatLeaf<(timm), [{
19600b57cec5SDimitry Andric  uint64_t Imm = N->getZExtValue() & 0x7;
19610b57cec5SDimitry Andric  return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
19620b57cec5SDimitry Andric}]>;
19630b57cec5SDimitry Andric
19640b57cec5SDimitry Andric// Patterns to select compares with loads in first operand.
19650b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
1966480093f4SDimitry Andric  def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1,
19670b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
19688bcb0991SDimitry Andric            (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>;
19690b57cec5SDimitry Andric
1970480093f4SDimitry Andric  def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1,
19710b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
19728bcb0991SDimitry Andric            (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>;
19730b57cec5SDimitry Andric
1974480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1,
19750b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
19768bcb0991SDimitry Andric            (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
19770b57cec5SDimitry Andric
1978480093f4SDimitry Andric  def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1,
19790b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
19808bcb0991SDimitry Andric            (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
19810b57cec5SDimitry Andric
19820b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
19830b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
19848bcb0991SDimitry Andric            (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
19850b57cec5SDimitry Andric
19860b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
19870b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
19888bcb0991SDimitry Andric            (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
19890b57cec5SDimitry Andric}
19900b57cec5SDimitry Andric
19910b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
1992480093f4SDimitry Andric  def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1,
19930b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
19948bcb0991SDimitry Andric            (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>;
19950b57cec5SDimitry Andric
19960b57cec5SDimitry Andric  def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
19970b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
19988bcb0991SDimitry Andric            (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>;
19990b57cec5SDimitry Andric}
20000b57cec5SDimitry Andric
20010b57cec5SDimitry Andriclet Predicates = [UseSSE1] in {
2002480093f4SDimitry Andric  def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1,
20030b57cec5SDimitry Andric                                CommutableCMPCC:$cc)),
20048bcb0991SDimitry Andric            (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>;
20050b57cec5SDimitry Andric
20060b57cec5SDimitry Andric  def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
20070b57cec5SDimitry Andric                          CommutableCMPCC:$cc)),
20088bcb0991SDimitry Andric            (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>;
20090b57cec5SDimitry Andric}
20100b57cec5SDimitry Andric
20110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20120b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions
20130b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20140b57cec5SDimitry Andric
20150b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
20160b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
20170b57cec5SDimitry Andric                         ValueType vt, string asm, PatFrag mem_frag,
20180b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, Domain d,
20190b57cec5SDimitry Andric                         bit IsCommutable = 0> {
20200b57cec5SDimitry Andric  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
20210b57cec5SDimitry Andric                   (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
20220b57cec5SDimitry Andric                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
20238bcb0991SDimitry Andric                                       (i8 timm:$src3))))], d>,
20240b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>;
20250b57cec5SDimitry Andric  let isCommutable = IsCommutable in
20260b57cec5SDimitry Andric  def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
20270b57cec5SDimitry Andric                 (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
20280b57cec5SDimitry Andric                 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
20298bcb0991SDimitry Andric                                     (i8 timm:$src3))))], d>,
20300b57cec5SDimitry Andric            Sched<[sched]>;
20310b57cec5SDimitry Andric}
20320b57cec5SDimitry Andric
20330b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
20340b57cec5SDimitry Andric  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
20350b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20360b57cec5SDimitry Andric           loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
20370b57cec5SDimitry Andric           PS, VEX_4V, VEX_WIG;
20380b57cec5SDimitry Andric  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
20390b57cec5SDimitry Andric           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20400b57cec5SDimitry Andric           loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
20410b57cec5SDimitry Andric           PS, VEX_4V, VEX_L, VEX_WIG;
20420b57cec5SDimitry Andric  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
20430b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20440b57cec5SDimitry Andric           loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
20450b57cec5SDimitry Andric           PD, VEX_4V, VEX_WIG;
20460b57cec5SDimitry Andric  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
20470b57cec5SDimitry Andric           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
20480b57cec5SDimitry Andric           loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
20490b57cec5SDimitry Andric           PD, VEX_4V, VEX_L, VEX_WIG;
20500b57cec5SDimitry Andric}
20510b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
20520b57cec5SDimitry Andric  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
20530b57cec5SDimitry Andric                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
20540b57cec5SDimitry Andric                    memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
20550b57cec5SDimitry Andric  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
20560b57cec5SDimitry Andric                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
20570b57cec5SDimitry Andric                    memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
20580b57cec5SDimitry Andric}
20590b57cec5SDimitry Andric
20600b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20610b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions
20620b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
20630b57cec5SDimitry Andric
20640b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave
20650b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
20660b57cec5SDimitry Andric                                   PatFrag mem_frag, RegisterClass RC,
20670b57cec5SDimitry Andric                                   X86MemOperand x86memop, string asm,
20680b57cec5SDimitry Andric                                   X86FoldableSchedWrite sched, Domain d,
20690b57cec5SDimitry Andric                                   bit IsCommutable = 0> {
20700b57cec5SDimitry Andric    let isCommutable = IsCommutable in
20710b57cec5SDimitry Andric    def rr : PI<opc, MRMSrcReg,
20720b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, RC:$src2),
20730b57cec5SDimitry Andric                asm, [(set RC:$dst,
20740b57cec5SDimitry Andric                           (vt (OpNode RC:$src1, RC:$src2)))], d>,
20750b57cec5SDimitry Andric                Sched<[sched]>;
20760b57cec5SDimitry Andric    def rm : PI<opc, MRMSrcMem,
20770b57cec5SDimitry Andric                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
20780b57cec5SDimitry Andric                asm, [(set RC:$dst,
20790b57cec5SDimitry Andric                           (vt (OpNode RC:$src1,
20800b57cec5SDimitry Andric                                       (mem_frag addr:$src2))))], d>,
20810b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
20820b57cec5SDimitry Andric}
20830b57cec5SDimitry Andric
20840b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
20850b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
20860b57cec5SDimitry Andric      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
20870b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
20880b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
20890b57cec5SDimitry Andric      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
20900b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
20910b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
20920b57cec5SDimitry Andric      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
20930b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
20940b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
20950b57cec5SDimitry Andric      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
20960b57cec5SDimitry Andric                     SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
20970b57cec5SDimitry Andric
20980b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
20990b57cec5SDimitry Andric      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
21000b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
21010b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
21020b57cec5SDimitry Andric      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
21030b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
21040b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
21050b57cec5SDimitry Andric      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
21060b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
21070b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
21080b57cec5SDimitry Andric      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
21090b57cec5SDimitry Andric                     SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
21100b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX]
21110b57cec5SDimitry Andric
21120b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
21130b57cec5SDimitry Andric  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
21140b57cec5SDimitry Andric        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
21150b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
21160b57cec5SDimitry Andric  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
21170b57cec5SDimitry Andric        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
21180b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
21190b57cec5SDimitry Andric  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
21200b57cec5SDimitry Andric        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
21210b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
21220b57cec5SDimitry Andric  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
21230b57cec5SDimitry Andric        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
21240b57cec5SDimitry Andric                       SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
21250b57cec5SDimitry Andric} // Constraints = "$src1 = $dst"
21260b57cec5SDimitry Andric
21270b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
21280b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
21290b57cec5SDimitry Andric            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
21300b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
21310b57cec5SDimitry Andric            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
21320b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
21330b57cec5SDimitry Andric            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
21340b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
21350b57cec5SDimitry Andric            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
21360b57cec5SDimitry Andric
21370b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
21380b57cec5SDimitry Andric            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
21390b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
21400b57cec5SDimitry Andric            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
21410b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
21420b57cec5SDimitry Andric            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
21430b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
21440b57cec5SDimitry Andric            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
21450b57cec5SDimitry Andric}
21460b57cec5SDimitry Andric
21470b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
21480b57cec5SDimitry Andric  // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
21490b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
21508bcb0991SDimitry Andric                              (v2f64 (simple_load addr:$src2)))),
21510b57cec5SDimitry Andric            (MOVHPDrm VR128:$src1, addr:$src2)>;
21520b57cec5SDimitry Andric}
21530b57cec5SDimitry Andric
21540b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21550b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask
21560b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
21570b57cec5SDimitry Andric
21580b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
21590b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
21600b57cec5SDimitry Andric                                string asm, Domain d> {
21610b57cec5SDimitry Andric  def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
21620b57cec5SDimitry Andric              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
21630b57cec5SDimitry Andric              [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>,
21640b57cec5SDimitry Andric              Sched<[WriteFMOVMSK]>;
21650b57cec5SDimitry Andric}
21660b57cec5SDimitry Andric
21670b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
21680b57cec5SDimitry Andric  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
21690b57cec5SDimitry Andric                                        SSEPackedSingle>, PS, VEX, VEX_WIG;
21700b57cec5SDimitry Andric  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
21710b57cec5SDimitry Andric                                        SSEPackedDouble>, PD, VEX, VEX_WIG;
21720b57cec5SDimitry Andric  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
21730b57cec5SDimitry Andric                                         SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
21740b57cec5SDimitry Andric  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
21750b57cec5SDimitry Andric                                         SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
21760b57cec5SDimitry Andric
21770b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
21780b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
21790b57cec5SDimitry Andric            (VMOVMSKPSrr VR128:$src)>;
21800b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
21810b57cec5SDimitry Andric            (VMOVMSKPDrr VR128:$src)>;
21820b57cec5SDimitry Andric  def : Pat<(X86movmsk (v8i32 VR256:$src)),
21830b57cec5SDimitry Andric            (VMOVMSKPSYrr VR256:$src)>;
21840b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i64 VR256:$src)),
21850b57cec5SDimitry Andric            (VMOVMSKPDYrr VR256:$src)>;
21860b57cec5SDimitry Andric}
21870b57cec5SDimitry Andric
21880b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
21890b57cec5SDimitry Andric                                     SSEPackedSingle>, PS;
21900b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
21910b57cec5SDimitry Andric                                     SSEPackedDouble>, PD;
21920b57cec5SDimitry Andric
21930b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
21940b57cec5SDimitry Andric  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
21950b57cec5SDimitry Andric  def : Pat<(X86movmsk (v4i32 VR128:$src)),
21960b57cec5SDimitry Andric            (MOVMSKPSrr VR128:$src)>;
21970b57cec5SDimitry Andric  def : Pat<(X86movmsk (v2i64 VR128:$src)),
21980b57cec5SDimitry Andric            (MOVMSKPDrr VR128:$src)>;
21990b57cec5SDimitry Andric}
22000b57cec5SDimitry Andric
22010b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
22020b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
22030b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
22040b57cec5SDimitry Andric
22050b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
22060b57cec5SDimitry Andric
22070b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator.
22080b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
22090b57cec5SDimitry Andric                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
22100b57cec5SDimitry Andric                        X86MemOperand x86memop, X86FoldableSchedWrite sched,
22110b57cec5SDimitry Andric                        bit IsCommutable, bit Is2Addr> {
22120b57cec5SDimitry Andric  let isCommutable = IsCommutable in
22130b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
22140b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
22150b57cec5SDimitry Andric       !if(Is2Addr,
22160b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
22170b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
22180b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
22190b57cec5SDimitry Andric       Sched<[sched]>;
22200b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
22210b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
22220b57cec5SDimitry Andric       !if(Is2Addr,
22230b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
22240b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
22250b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
22260b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
22270b57cec5SDimitry Andric}
22280b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
22290b57cec5SDimitry Andric
22300b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
22310b57cec5SDimitry Andric                         ValueType OpVT128, ValueType OpVT256,
22320b57cec5SDimitry Andric                         X86SchedWriteWidths sched, bit IsCommutable,
22330b57cec5SDimitry Andric                         Predicate prd> {
22340b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
22350b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
22360b57cec5SDimitry Andric                             VR128, load, i128mem, sched.XMM,
22370b57cec5SDimitry Andric                             IsCommutable, 0>, VEX_4V, VEX_WIG;
22380b57cec5SDimitry Andric
22390b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
22400b57cec5SDimitry Andric  defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
22410b57cec5SDimitry Andric                           memop, i128mem, sched.XMM, IsCommutable, 1>;
22420b57cec5SDimitry Andric
22430b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
22440b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
22450b57cec5SDimitry Andric                               OpVT256, VR256, load, i256mem, sched.YMM,
22460b57cec5SDimitry Andric                               IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
22470b57cec5SDimitry Andric}
22480b57cec5SDimitry Andric
22490b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions
22500b57cec5SDimitry Andric
22510b57cec5SDimitry Andricdefm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
22520b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22530b57cec5SDimitry Andricdefm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
22540b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22550b57cec5SDimitry Andricdefm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
22560b57cec5SDimitry Andric                           SchedWriteVecLogic, 1, NoVLX>;
22570b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
22580b57cec5SDimitry Andric                           SchedWriteVecLogic, 0, NoVLX>;
22590b57cec5SDimitry Andric
22600b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
22610b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions
22620b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
22630b57cec5SDimitry Andric
22640b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
22650b57cec5SDimitry Andric///
22660b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2
22670b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later.
22680b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
22690b57cec5SDimitry Andric                                   SDNode OpNode, X86SchedWriteWidths sched> {
22700b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
22710b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
22720b57cec5SDimitry Andric        !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
22730b57cec5SDimitry Andric        [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
22740b57cec5SDimitry Andric
22750b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
22760b57cec5SDimitry Andric        !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
22770b57cec5SDimitry Andric        [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
22780b57cec5SDimitry Andric
22790b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
22800b57cec5SDimitry Andric       !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
22810b57cec5SDimitry Andric       [], [], 0>, PS, VEX_4V, VEX_WIG;
22820b57cec5SDimitry Andric
22830b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
22840b57cec5SDimitry Andric       !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
22850b57cec5SDimitry Andric       [], [], 0>, PD, VEX_4V, VEX_WIG;
22860b57cec5SDimitry Andric  }
22870b57cec5SDimitry Andric
22880b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
22890b57cec5SDimitry Andric    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
22900b57cec5SDimitry Andric         !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
22910b57cec5SDimitry Andric         [], []>, PS;
22920b57cec5SDimitry Andric
22930b57cec5SDimitry Andric    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
22940b57cec5SDimitry Andric         !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
22950b57cec5SDimitry Andric         [], []>, PD;
22960b57cec5SDimitry Andric  }
22970b57cec5SDimitry Andric}
22980b57cec5SDimitry Andric
22990b57cec5SDimitry Andricdefm AND  : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>;
23000b57cec5SDimitry Andricdefm OR   : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>;
23010b57cec5SDimitry Andricdefm XOR  : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
23020b57cec5SDimitry Andriclet isCommutable = 0 in
23030b57cec5SDimitry Andric  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
23040b57cec5SDimitry Andric
23050b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
23060b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
23070b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23080b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
23090b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23100b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
23110b57cec5SDimitry Andric            (VPANDYrr VR256:$src1, VR256:$src2)>;
23120b57cec5SDimitry Andric
23130b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
23140b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23150b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
23160b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23170b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
23180b57cec5SDimitry Andric            (VPORYrr VR256:$src1, VR256:$src2)>;
23190b57cec5SDimitry Andric
23200b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
23210b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23220b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
23230b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23240b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
23250b57cec5SDimitry Andric            (VPXORYrr VR256:$src1, VR256:$src2)>;
23260b57cec5SDimitry Andric
23270b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
23280b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23290b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
23300b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23310b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
23320b57cec5SDimitry Andric            (VPANDNYrr VR256:$src1, VR256:$src2)>;
23330b57cec5SDimitry Andric
23340b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
23350b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23360b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
23370b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23380b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
23390b57cec5SDimitry Andric            (VPANDYrm VR256:$src1, addr:$src2)>;
23400b57cec5SDimitry Andric
23410b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
23420b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23430b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
23440b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23450b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
23460b57cec5SDimitry Andric            (VPORYrm VR256:$src1, addr:$src2)>;
23470b57cec5SDimitry Andric
23480b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
23490b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23500b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
23510b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23520b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
23530b57cec5SDimitry Andric            (VPXORYrm VR256:$src1, addr:$src2)>;
23540b57cec5SDimitry Andric
23550b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
23560b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
23570b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
23580b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
23590b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
23600b57cec5SDimitry Andric            (VPANDNYrm VR256:$src1, addr:$src2)>;
23610b57cec5SDimitry Andric}
23620b57cec5SDimitry Andric
23630b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with
23640b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available.
23650b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
23660b57cec5SDimitry Andric  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
23670b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
23680b57cec5SDimitry Andric  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
23690b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
23700b57cec5SDimitry Andric  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
23710b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
23720b57cec5SDimitry Andric  def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
23730b57cec5SDimitry Andric            (VANDPSYrr VR256:$src1, VR256:$src2)>;
23740b57cec5SDimitry Andric
23750b57cec5SDimitry Andric  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
23760b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
23770b57cec5SDimitry Andric  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
23780b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
23790b57cec5SDimitry Andric  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
23800b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
23810b57cec5SDimitry Andric  def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
23820b57cec5SDimitry Andric            (VORPSYrr VR256:$src1, VR256:$src2)>;
23830b57cec5SDimitry Andric
23840b57cec5SDimitry Andric  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
23850b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
23860b57cec5SDimitry Andric  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
23870b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
23880b57cec5SDimitry Andric  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
23890b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
23900b57cec5SDimitry Andric  def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
23910b57cec5SDimitry Andric            (VXORPSYrr VR256:$src1, VR256:$src2)>;
23920b57cec5SDimitry Andric
23930b57cec5SDimitry Andric  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
23940b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
23950b57cec5SDimitry Andric  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
23960b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
23970b57cec5SDimitry Andric  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
23980b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
23990b57cec5SDimitry Andric  def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
24000b57cec5SDimitry Andric            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
24010b57cec5SDimitry Andric
24020b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
24030b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24040b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
24050b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24060b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
24070b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24080b57cec5SDimitry Andric  def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
24090b57cec5SDimitry Andric            (VANDPSYrm VR256:$src1, addr:$src2)>;
24100b57cec5SDimitry Andric
24110b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
24120b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24130b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
24140b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24150b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
24160b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24170b57cec5SDimitry Andric  def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
24180b57cec5SDimitry Andric            (VORPSYrm VR256:$src1, addr:$src2)>;
24190b57cec5SDimitry Andric
24200b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
24210b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24220b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
24230b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24240b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
24250b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24260b57cec5SDimitry Andric  def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
24270b57cec5SDimitry Andric            (VXORPSYrm VR256:$src1, addr:$src2)>;
24280b57cec5SDimitry Andric
24290b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
24300b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24310b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
24320b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24330b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
24340b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24350b57cec5SDimitry Andric  def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
24360b57cec5SDimitry Andric            (VANDNPSYrm VR256:$src1, addr:$src2)>;
24370b57cec5SDimitry Andric}
24380b57cec5SDimitry Andric
24390b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
24400b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
24410b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24420b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
24430b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24440b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
24450b57cec5SDimitry Andric            (VPANDrr VR128:$src1, VR128:$src2)>;
24460b57cec5SDimitry Andric
24470b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
24480b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24490b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
24500b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24510b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
24520b57cec5SDimitry Andric            (VPORrr VR128:$src1, VR128:$src2)>;
24530b57cec5SDimitry Andric
24540b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
24550b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
24560b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
24570b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
24580b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
24590b57cec5SDimitry Andric            (VPXORrr VR128:$src1, VR128:$src2)>;
24600b57cec5SDimitry Andric
24610b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
24620b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
24630b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
24640b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
24650b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
24660b57cec5SDimitry Andric            (VPANDNrr VR128:$src1, VR128:$src2)>;
24670b57cec5SDimitry Andric
24680b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
24690b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
24700b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
24710b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
24720b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
24730b57cec5SDimitry Andric            (VPANDrm VR128:$src1, addr:$src2)>;
24740b57cec5SDimitry Andric
24750b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
24760b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
24770b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
24780b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
24790b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
24800b57cec5SDimitry Andric            (VPORrm VR128:$src1, addr:$src2)>;
24810b57cec5SDimitry Andric
24820b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
24830b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
24840b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
24850b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
24860b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
24870b57cec5SDimitry Andric            (VPXORrm VR128:$src1, addr:$src2)>;
24880b57cec5SDimitry Andric
24890b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
24900b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
24910b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
24920b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
24930b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
24940b57cec5SDimitry Andric            (VPANDNrm VR128:$src1, addr:$src2)>;
24950b57cec5SDimitry Andric}
24960b57cec5SDimitry Andric
24970b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
24980b57cec5SDimitry Andric  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
24990b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25000b57cec5SDimitry Andric  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
25010b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25020b57cec5SDimitry Andric  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
25030b57cec5SDimitry Andric            (PANDrr VR128:$src1, VR128:$src2)>;
25040b57cec5SDimitry Andric
25050b57cec5SDimitry Andric  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
25060b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25070b57cec5SDimitry Andric  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
25080b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25090b57cec5SDimitry Andric  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
25100b57cec5SDimitry Andric            (PORrr VR128:$src1, VR128:$src2)>;
25110b57cec5SDimitry Andric
25120b57cec5SDimitry Andric  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
25130b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25140b57cec5SDimitry Andric  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
25150b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25160b57cec5SDimitry Andric  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
25170b57cec5SDimitry Andric            (PXORrr VR128:$src1, VR128:$src2)>;
25180b57cec5SDimitry Andric
25190b57cec5SDimitry Andric  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
25200b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25210b57cec5SDimitry Andric  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
25220b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25230b57cec5SDimitry Andric  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
25240b57cec5SDimitry Andric            (PANDNrr VR128:$src1, VR128:$src2)>;
25250b57cec5SDimitry Andric
25260b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
25270b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25280b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
25290b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25300b57cec5SDimitry Andric  def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
25310b57cec5SDimitry Andric            (PANDrm VR128:$src1, addr:$src2)>;
25320b57cec5SDimitry Andric
25330b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
25340b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25350b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
25360b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25370b57cec5SDimitry Andric  def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
25380b57cec5SDimitry Andric            (PORrm VR128:$src1, addr:$src2)>;
25390b57cec5SDimitry Andric
25400b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
25410b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25420b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
25430b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25440b57cec5SDimitry Andric  def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
25450b57cec5SDimitry Andric            (PXORrm VR128:$src1, addr:$src2)>;
25460b57cec5SDimitry Andric
25470b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
25480b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25490b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
25500b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25510b57cec5SDimitry Andric  def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
25520b57cec5SDimitry Andric            (PANDNrm VR128:$src1, addr:$src2)>;
25530b57cec5SDimitry Andric}
25540b57cec5SDimitry Andric
25550b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available.
25560b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
25570b57cec5SDimitry Andric          (ANDPSrr VR128:$src1, VR128:$src2)>;
25580b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
25590b57cec5SDimitry Andric          (ORPSrr VR128:$src1, VR128:$src2)>;
25600b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
25610b57cec5SDimitry Andric          (XORPSrr VR128:$src1, VR128:$src2)>;
25620b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
25630b57cec5SDimitry Andric          (ANDNPSrr VR128:$src1, VR128:$src2)>;
25640b57cec5SDimitry Andric
25650b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
25660b57cec5SDimitry Andric          (ANDPSrm VR128:$src1, addr:$src2)>;
25670b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
25680b57cec5SDimitry Andric          (ORPSrm VR128:$src1, addr:$src2)>;
25690b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
25700b57cec5SDimitry Andric          (XORPSrm VR128:$src1, addr:$src2)>;
25710b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
25720b57cec5SDimitry Andric          (ANDNPSrm VR128:$src1, addr:$src2)>;
25730b57cec5SDimitry Andric
25740b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
25750b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions
25760b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
25770b57cec5SDimitry Andric
25780b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
25790b57cec5SDimitry Andric/// vector forms.
25800b57cec5SDimitry Andric///
25810b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
25820b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
25830b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar)
25840b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted).
25850b57cec5SDimitry Andric///
25860b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem.
25870b57cec5SDimitry Andric///
25880b57cec5SDimitry Andric
25890b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
25900b57cec5SDimitry Andric/// classes below
25910b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
2592*fe6060f1SDimitry Andric                                  SDPatternOperator OpNode, X86SchedWriteSizes sched> {
2593480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
25940b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
25950b57cec5SDimitry Andric  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
25960b57cec5SDimitry Andric                               VR128, v4f32, f128mem, loadv4f32,
25970b57cec5SDimitry Andric                               SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
25980b57cec5SDimitry Andric  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
25990b57cec5SDimitry Andric                               VR128, v2f64, f128mem, loadv2f64,
26000b57cec5SDimitry Andric                               SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
26010b57cec5SDimitry Andric
26020b57cec5SDimitry Andric  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
26030b57cec5SDimitry Andric                        OpNode, VR256, v8f32, f256mem, loadv8f32,
26040b57cec5SDimitry Andric                        SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
26050b57cec5SDimitry Andric  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
26060b57cec5SDimitry Andric                        OpNode, VR256, v4f64, f256mem, loadv4f64,
26070b57cec5SDimitry Andric                        SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
26080b57cec5SDimitry Andric  }
26090b57cec5SDimitry Andric
26100b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
26110b57cec5SDimitry Andric    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
26120b57cec5SDimitry Andric                              v4f32, f128mem, memopv4f32, SSEPackedSingle,
26130b57cec5SDimitry Andric                              sched.PS.XMM>, PS;
26140b57cec5SDimitry Andric    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
26150b57cec5SDimitry Andric                              v2f64, f128mem, memopv2f64, SSEPackedDouble,
26160b57cec5SDimitry Andric                              sched.PD.XMM>, PD;
26170b57cec5SDimitry Andric  }
26180b57cec5SDimitry Andric}
2619480093f4SDimitry Andric}
26200b57cec5SDimitry Andric
2621*fe6060f1SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
26220b57cec5SDimitry Andric                                  X86SchedWriteSizes sched> {
2623480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
26240b57cec5SDimitry Andric  defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
26250b57cec5SDimitry Andric                         OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
26260b57cec5SDimitry Andric                         XS, VEX_4V, VEX_LIG, VEX_WIG;
26270b57cec5SDimitry Andric  defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
26280b57cec5SDimitry Andric                         OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
26290b57cec5SDimitry Andric                         XD, VEX_4V, VEX_LIG, VEX_WIG;
26300b57cec5SDimitry Andric
26310b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
26320b57cec5SDimitry Andric    defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
26330b57cec5SDimitry Andric                              OpNode, FR32, f32mem, SSEPackedSingle,
26340b57cec5SDimitry Andric                              sched.PS.Scl>, XS;
26350b57cec5SDimitry Andric    defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
26360b57cec5SDimitry Andric                              OpNode, FR64, f64mem, SSEPackedDouble,
26370b57cec5SDimitry Andric                              sched.PD.Scl>, XD;
26380b57cec5SDimitry Andric  }
26390b57cec5SDimitry Andric}
2640480093f4SDimitry Andric}
26410b57cec5SDimitry Andric
26420b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
26430b57cec5SDimitry Andric                                      SDPatternOperator OpNode,
26440b57cec5SDimitry Andric                                      X86SchedWriteSizes sched> {
2645480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
26460b57cec5SDimitry Andric  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
26470b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
26480b57cec5SDimitry Andric                   SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
26490b57cec5SDimitry Andric  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
26500b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
26510b57cec5SDimitry Andric                   SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
26520b57cec5SDimitry Andric
26530b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
26540b57cec5SDimitry Andric    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
26550b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
26560b57cec5SDimitry Andric                   SSEPackedSingle, sched.PS.Scl>, XS;
26570b57cec5SDimitry Andric    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
26580b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
26590b57cec5SDimitry Andric                   SSEPackedDouble, sched.PD.Scl>, XD;
26600b57cec5SDimitry Andric  }
26610b57cec5SDimitry Andric}
2662480093f4SDimitry Andric}
26630b57cec5SDimitry Andric
26640b57cec5SDimitry Andric// Binary Arithmetic instructions
2665480093f4SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>,
2666480093f4SDimitry Andric           basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>,
26670b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>;
2668480093f4SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
2669480093f4SDimitry Andric           basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>,
26700b57cec5SDimitry Andric           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>;
26710b57cec5SDimitry Andriclet isCommutable = 0 in {
2672480093f4SDimitry Andric  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
2673480093f4SDimitry Andric             basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>,
26740b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>;
2675480093f4SDimitry Andric  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
2676480093f4SDimitry Andric             basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>,
26770b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>;
26780b57cec5SDimitry Andric  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
26790b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>,
26800b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>;
26810b57cec5SDimitry Andric  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
26820b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>,
26830b57cec5SDimitry Andric             basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>;
26840b57cec5SDimitry Andric}
26850b57cec5SDimitry Andric
26860b57cec5SDimitry Andriclet isCodeGenOnly = 1 in {
26870b57cec5SDimitry Andric  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>,
26880b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>;
26890b57cec5SDimitry Andric  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>,
26900b57cec5SDimitry Andric             basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>;
26910b57cec5SDimitry Andric}
26920b57cec5SDimitry Andric
26930b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from
26940b57cec5SDimitry Andric// either:
26950b57cec5SDimitry Andric//
26960b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend
26970b57cec5SDimitry Andric//
26980b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector
26990b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions
27000b57cec5SDimitry Andric// like addss or mulss.
27010b57cec5SDimitry Andric//
27020b57cec5SDimitry Andric// For example, given the following code:
27030b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
27040b57cec5SDimitry Andric//     A[0] += B[0];
27050b57cec5SDimitry Andric//     return A;
27060b57cec5SDimitry Andric//   }
27070b57cec5SDimitry Andric//
27080b57cec5SDimitry Andric// Previously we generated:
27090b57cec5SDimitry Andric//   addss %xmm0, %xmm1
27100b57cec5SDimitry Andric//   movss %xmm1, %xmm0
27110b57cec5SDimitry Andric//
27120b57cec5SDimitry Andric// We now generate:
27130b57cec5SDimitry Andric//   addss %xmm1, %xmm0
27140b57cec5SDimitry Andric//
27150b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert
27160b57cec5SDimitry Andric//
27170b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction
27180b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction.
27190b57cec5SDimitry Andric//
27200b57cec5SDimitry Andric// For example, given the following code:
27210b57cec5SDimitry Andric//   __m128 foo(__m128 A, __m128 B) {
27220b57cec5SDimitry Andric//     __m128 C = A + B;
27230b57cec5SDimitry Andric//     return (__m128) {c[0], a[1], a[2], a[3]};
27240b57cec5SDimitry Andric//   }
27250b57cec5SDimitry Andric//
27260b57cec5SDimitry Andric// Previously we generated:
27270b57cec5SDimitry Andric//   addps %xmm0, %xmm1
27280b57cec5SDimitry Andric//   movss %xmm1, %xmm0
27290b57cec5SDimitry Andric//
27300b57cec5SDimitry Andric// We now generate:
27310b57cec5SDimitry Andric//   addss %xmm1, %xmm0
27320b57cec5SDimitry Andric
27330b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of
27340b57cec5SDimitry Andric// patterns we have to try to match.
2735*fe6060f1SDimitry Andricmulticlass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move,
27360b57cec5SDimitry Andric                                ValueType VT, ValueType EltTy,
27370b57cec5SDimitry Andric                                RegisterClass RC, PatFrag ld_frag,
27380b57cec5SDimitry Andric                                Predicate BasePredicate> {
27390b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
27400b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
27410b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27420b57cec5SDimitry Andric                        (VT (scalar_to_vector
27430b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27440b57cec5SDimitry Andric                                 RC:$src))))),
27450b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
27460b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
27470b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27480b57cec5SDimitry Andric                        (VT (scalar_to_vector
27490b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27500b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
27510b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
27520b57cec5SDimitry Andric  }
27530b57cec5SDimitry Andric
27540b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
27550b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
27560b57cec5SDimitry Andric    // extracted scalar math op with insert via movss/movsd
27570b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27580b57cec5SDimitry Andric                        (VT (scalar_to_vector
27590b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27600b57cec5SDimitry Andric                                 RC:$src))))),
27610b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
27620b57cec5SDimitry Andric               (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
27630b57cec5SDimitry Andric    def : Pat<(VT (Move (VT VR128:$dst),
27640b57cec5SDimitry Andric                        (VT (scalar_to_vector
27650b57cec5SDimitry Andric                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
27660b57cec5SDimitry Andric                                 (ld_frag addr:$src)))))),
27670b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
27680b57cec5SDimitry Andric  }
27690b57cec5SDimitry Andric}
27700b57cec5SDimitry Andric
2771480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2772480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2773480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
2774480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
27750b57cec5SDimitry Andric
2776480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2777480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2778480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
2779480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
27800b57cec5SDimitry Andric
27810b57cec5SDimitry Andric/// Unop Arithmetic
27820b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to
27830b57cec5SDimitry Andric/// represent the associated intrinsic operation.  This form is unlike the
27840b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a
27850b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined.
27860b57cec5SDimitry Andric///
27870b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form.
27880b57cec5SDimitry Andric
27890b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form
27900b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because
27910b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive.
27920b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
27930b57cec5SDimitry Andric                          ValueType ScalarVT, X86MemOperand x86memop,
2794*fe6060f1SDimitry Andric                          Operand intmemop, SDPatternOperator OpNode, Domain d,
27950b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
27960b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
27970b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
27980b57cec5SDimitry Andric              !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
27990b57cec5SDimitry Andric            [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
28000b57cec5SDimitry Andric            Requires<[target]>;
28010b57cec5SDimitry Andric  let mayLoad = 1 in
28020b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
28030b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
28040b57cec5SDimitry Andric            [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
28050b57cec5SDimitry Andric            Sched<[sched.Folded]>,
28060b57cec5SDimitry Andric            Requires<[target, OptForSize]>;
28070b57cec5SDimitry Andric  }
28080b57cec5SDimitry Andric
28090b57cec5SDimitry Andric  let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
28100b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
28110b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
28120b57cec5SDimitry Andric                Sched<[sched]>;
28130b57cec5SDimitry Andric  let mayLoad = 1 in
28140b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
28150b57cec5SDimitry Andric                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
28160b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold]>;
28170b57cec5SDimitry Andric  }
28180b57cec5SDimitry Andric
28190b57cec5SDimitry Andric}
28200b57cec5SDimitry Andric
28210b57cec5SDimitry Andricmulticlass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
28225ffd83dbSDimitry Andric                              PatFrags mem_frags, Intrinsic Intr,
28230b57cec5SDimitry Andric                              Predicate target, string Suffix> {
28240b57cec5SDimitry Andric  let Predicates = [target] in {
28250b57cec5SDimitry Andric  // These are unary operations, but they are modeled as having 2 source operands
28260b57cec5SDimitry Andric  // because the high elements of the destination are unchanged in SSE.
28270b57cec5SDimitry Andric  def : Pat<(Intr VR128:$src),
28280b57cec5SDimitry Andric            (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>;
28290b57cec5SDimitry Andric  }
28300b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
28310b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
28320b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
28330b57cec5SDimitry Andric  // movss mem, %xmm0
28340b57cec5SDimitry Andric  // rcpss %xmm0, %xmm0
28350b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
28360b57cec5SDimitry Andric  // rcpss mem, %xmm0
28370b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
28385ffd83dbSDimitry Andric    def : Pat<(Intr (mem_frags addr:$src2)),
28390b57cec5SDimitry Andric               (!cast<Instruction>(NAME#m_Int)
28400b57cec5SDimitry Andric                      (vt (IMPLICIT_DEF)), addr:$src2)>;
28410b57cec5SDimitry Andric  }
28420b57cec5SDimitry Andric}
28430b57cec5SDimitry Andric
28445ffd83dbSDimitry Andricmulticlass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, PatFrags mem_frags,
28450b57cec5SDimitry Andric                              Intrinsic Intr, Predicate target> {
28460b57cec5SDimitry Andric  let Predicates = [target] in {
28470b57cec5SDimitry Andric   def : Pat<(Intr VR128:$src),
28480b57cec5SDimitry Andric             (!cast<Instruction>(NAME#r_Int) VR128:$src,
28490b57cec5SDimitry Andric                                 VR128:$src)>;
28500b57cec5SDimitry Andric  }
28510b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
28525ffd83dbSDimitry Andric    def : Pat<(Intr (mem_frags addr:$src2)),
28530b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m_Int)
28540b57cec5SDimitry Andric                    (vt (IMPLICIT_DEF)), addr:$src2)>;
28550b57cec5SDimitry Andric  }
28560b57cec5SDimitry Andric}
28570b57cec5SDimitry Andric
28580b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
28590b57cec5SDimitry Andric                          ValueType ScalarVT, X86MemOperand x86memop,
2860*fe6060f1SDimitry Andric                          Operand intmemop, SDPatternOperator OpNode, Domain d,
28610b57cec5SDimitry Andric                          X86FoldableSchedWrite sched, Predicate target> {
28620b57cec5SDimitry Andric  let isCodeGenOnly = 1, hasSideEffects = 0 in {
28630b57cec5SDimitry Andric  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
28640b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
28650b57cec5SDimitry Andric            [], d>, Sched<[sched]>;
28660b57cec5SDimitry Andric  let mayLoad = 1 in
28670b57cec5SDimitry Andric  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
28680b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
28690b57cec5SDimitry Andric            [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
28700b57cec5SDimitry Andric  }
28710b57cec5SDimitry Andric  let hasSideEffects = 0, ExeDomain = d in {
28720b57cec5SDimitry Andric  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
28730b57cec5SDimitry Andric                (ins VR128:$src1, VR128:$src2),
28740b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
28750b57cec5SDimitry Andric             []>, Sched<[sched]>;
28760b57cec5SDimitry Andric  let mayLoad = 1 in
28770b57cec5SDimitry Andric  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
28780b57cec5SDimitry Andric                (ins VR128:$src1, intmemop:$src2),
28790b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
28800b57cec5SDimitry Andric             []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
28810b57cec5SDimitry Andric  }
28820b57cec5SDimitry Andric
28830b57cec5SDimitry Andric  // We don't want to fold scalar loads into these instructions unless
28840b57cec5SDimitry Andric  // optimizing for size. This is because the folded instruction will have a
28850b57cec5SDimitry Andric  // partial register update, while the unfolded sequence will not, e.g.
28860b57cec5SDimitry Andric  // vmovss mem, %xmm0
28870b57cec5SDimitry Andric  // vrcpss %xmm0, %xmm0, %xmm0
28880b57cec5SDimitry Andric  // which has a clobber before the rcp, vs.
28890b57cec5SDimitry Andric  // vrcpss mem, %xmm0, %xmm0
28900b57cec5SDimitry Andric  // TODO: In theory, we could fold the load, and avoid the stall caused by
28910b57cec5SDimitry Andric  // the partial register store, either in BreakFalseDeps or with smarter RA.
28920b57cec5SDimitry Andric  let Predicates = [target] in {
28930b57cec5SDimitry Andric   def : Pat<(OpNode RC:$src),  (!cast<Instruction>(NAME#r)
28940b57cec5SDimitry Andric                                (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
28950b57cec5SDimitry Andric  }
28960b57cec5SDimitry Andric  let Predicates = [target, OptForSize] in {
28970b57cec5SDimitry Andric    def : Pat<(ScalarVT (OpNode (load addr:$src))),
28980b57cec5SDimitry Andric              (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)),
28990b57cec5SDimitry Andric            addr:$src)>;
29000b57cec5SDimitry Andric  }
29010b57cec5SDimitry Andric}
29020b57cec5SDimitry Andric
29030b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form.
2904*fe6060f1SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
29050b57cec5SDimitry Andric                          X86SchedWriteWidths sched, list<Predicate> prds> {
29060b57cec5SDimitry Andriclet Predicates = prds in {
29070b57cec5SDimitry Andric  def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29080b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29090b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
29100b57cec5SDimitry Andric                       [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
29110b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM]>, VEX_WIG;
29120b57cec5SDimitry Andric  def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29130b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29140b57cec5SDimitry Andric                                  "ps\t{$src, $dst|$dst, $src}"),
29150b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
29160b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
29170b57cec5SDimitry Andric  def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
29180b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29190b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
29200b57cec5SDimitry Andric                        [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
29210b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
29220b57cec5SDimitry Andric  def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
29230b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29240b57cec5SDimitry Andric                                   "ps\t{$src, $dst|$dst, $src}"),
29250b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
29260b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
29270b57cec5SDimitry Andric}
29280b57cec5SDimitry Andric
29290b57cec5SDimitry Andric  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29300b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
29310b57cec5SDimitry Andric                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
29320b57cec5SDimitry Andric                Sched<[sched.XMM]>;
29330b57cec5SDimitry Andric  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29340b57cec5SDimitry Andric                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
29350b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>,
29360b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
29370b57cec5SDimitry Andric}
29380b57cec5SDimitry Andric
29390b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms.
29400b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
2941*fe6060f1SDimitry Andric                          SDPatternOperator OpNode, X86SchedWriteWidths sched> {
29420b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
29430b57cec5SDimitry Andric  def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29440b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29450b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
29460b57cec5SDimitry Andric                       [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
29470b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM]>, VEX_WIG;
29480b57cec5SDimitry Andric  def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29490b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
29500b57cec5SDimitry Andric                                  "pd\t{$src, $dst|$dst, $src}"),
29510b57cec5SDimitry Andric                       [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
29520b57cec5SDimitry Andric                       VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
29530b57cec5SDimitry Andric  def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
29540b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29550b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
29560b57cec5SDimitry Andric                        [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
29570b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
29580b57cec5SDimitry Andric  def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
29590b57cec5SDimitry Andric                        !strconcat("v", OpcodeStr,
29600b57cec5SDimitry Andric                                   "pd\t{$src, $dst|$dst, $src}"),
29610b57cec5SDimitry Andric                        [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
29620b57cec5SDimitry Andric                        VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
29630b57cec5SDimitry Andric}
29640b57cec5SDimitry Andric
29650b57cec5SDimitry Andric  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
29660b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
29670b57cec5SDimitry Andric                [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
29680b57cec5SDimitry Andric                Sched<[sched.XMM]>;
29690b57cec5SDimitry Andric  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
29700b57cec5SDimitry Andric                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
29710b57cec5SDimitry Andric                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>,
29720b57cec5SDimitry Andric                Sched<[sched.XMM.Folded]>;
29730b57cec5SDimitry Andric}
29740b57cec5SDimitry Andric
29750b57cec5SDimitry Andricmulticlass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
29760b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
29770b57cec5SDimitry Andric  defm SS        :  sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
29785ffd83dbSDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
29790b57cec5SDimitry Andric                      UseSSE1, "SS">, XS;
29800b57cec5SDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
29815ffd83dbSDimitry Andric                      !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
29820b57cec5SDimitry Andric                      AVXTarget>,
29830b57cec5SDimitry Andric                      XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
29840b57cec5SDimitry Andric}
29850b57cec5SDimitry Andric
2986*fe6060f1SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
29870b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
29885ffd83dbSDimitry Andric  defm SS        :  sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32, f32mem,
29890b57cec5SDimitry Andric                      ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
29905ffd83dbSDimitry Andric  defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
29910b57cec5SDimitry Andric                      f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
29920b57cec5SDimitry Andric                       XS, VEX_4V, VEX_LIG, VEX_WIG;
29930b57cec5SDimitry Andric}
29940b57cec5SDimitry Andric
2995*fe6060f1SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
29960b57cec5SDimitry Andric                          X86SchedWriteWidths sched, Predicate AVXTarget> {
29975ffd83dbSDimitry Andric  defm SD         : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64, f64mem,
29980b57cec5SDimitry Andric                         sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
29995ffd83dbSDimitry Andric  defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
30000b57cec5SDimitry Andric                         f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
30010b57cec5SDimitry Andric                         XD, VEX_4V, VEX_LIG, VEX_WIG;
30020b57cec5SDimitry Andric}
30030b57cec5SDimitry Andric
30040b57cec5SDimitry Andric// Square root.
3005480093f4SDimitry Andricdefm SQRT  : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
3006480093f4SDimitry Andric             sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
3007480093f4SDimitry Andric             sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>,
3008480093f4SDimitry Andric             sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
30090b57cec5SDimitry Andric
30100b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement
30110b57cec5SDimitry Andric// in order to obtain suitable precision.
30120b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
30130b57cec5SDimitry Andric             sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
30140b57cec5SDimitry Andric             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
30150b57cec5SDimitry Andricdefm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
30160b57cec5SDimitry Andric             sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
30170b57cec5SDimitry Andric             sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
30180b57cec5SDimitry Andric
30190b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions.
30200b57cec5SDimitry Andric
3021*fe6060f1SDimitry Andricmulticlass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move,
30220b57cec5SDimitry Andric                                      ValueType VT, Predicate BasePredicate> {
30230b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
30240b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
30250b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
30260b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30270b57cec5SDimitry Andric  }
30280b57cec5SDimitry Andric
30290b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
30300b57cec5SDimitry Andric  let Predicates = [UseAVX] in {
30310b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
30320b57cec5SDimitry Andric                                  (OpNode (extractelt VT:$src, 0))))),
30330b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30340b57cec5SDimitry Andric  }
30350b57cec5SDimitry Andric}
30360b57cec5SDimitry Andric
3037480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
3038480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
30390b57cec5SDimitry Andric
30400b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
30410b57cec5SDimitry Andric                                           SDNode Move, ValueType VT,
30420b57cec5SDimitry Andric                                           Predicate BasePredicate> {
30430b57cec5SDimitry Andric  let Predicates = [BasePredicate] in {
30440b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
30450b57cec5SDimitry Andric              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30460b57cec5SDimitry Andric  }
30470b57cec5SDimitry Andric
30480b57cec5SDimitry Andric  // Repeat for AVX versions of the instructions.
30490b57cec5SDimitry Andric  let Predicates = [HasAVX] in {
30500b57cec5SDimitry Andric    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
30510b57cec5SDimitry Andric              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
30520b57cec5SDimitry Andric  }
30530b57cec5SDimitry Andric}
30540b57cec5SDimitry Andric
30550b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
30560b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
30570b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
30580b57cec5SDimitry Andric                                       v4f32, UseSSE1>;
30590b57cec5SDimitry Andric
30600b57cec5SDimitry Andric
30610b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
30620b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores
30630b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
30640b57cec5SDimitry Andric
30650b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
30660b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
30670b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
30680b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
30690b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
30700b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
30710b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f32 VR128:$src),
30720b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_WIG;
30730b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
30740b57cec5SDimitry Andric                     (ins f128mem:$dst, VR128:$src),
30750b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
30760b57cec5SDimitry Andric                     [(alignednontemporalstore (v2f64 VR128:$src),
30770b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_WIG;
30780b57cec5SDimitry Andric} // SchedRW
30790b57cec5SDimitry Andric
30800b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
30810b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
30820b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
30830b57cec5SDimitry Andric                     "movntps\t{$src, $dst|$dst, $src}",
30840b57cec5SDimitry Andric                     [(alignednontemporalstore (v8f32 VR256:$src),
30850b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, VEX_WIG;
30860b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
30870b57cec5SDimitry Andric                     (ins f256mem:$dst, VR256:$src),
30880b57cec5SDimitry Andric                     "movntpd\t{$src, $dst|$dst, $src}",
30890b57cec5SDimitry Andric                     [(alignednontemporalstore (v4f64 VR256:$src),
30900b57cec5SDimitry Andric                                               addr:$dst)]>, VEX, VEX_L, VEX_WIG;
30910b57cec5SDimitry Andric} // SchedRW
30920b57cec5SDimitry Andric
30930b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
30940b57cec5SDimitry Andricdef VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
30950b57cec5SDimitry Andric                         (ins i128mem:$dst, VR128:$src),
30960b57cec5SDimitry Andric                         "movntdq\t{$src, $dst|$dst, $src}",
30970b57cec5SDimitry Andric                         [(alignednontemporalstore (v2i64 VR128:$src),
30980b57cec5SDimitry Andric                                                   addr:$dst)]>, VEX, VEX_WIG,
30990b57cec5SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
31000b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
31010b57cec5SDimitry Andric                    (ins i256mem:$dst, VR256:$src),
31020b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
31030b57cec5SDimitry Andric                    [(alignednontemporalstore (v4i64 VR256:$src),
31040b57cec5SDimitry Andric                                              addr:$dst)]>, VEX, VEX_L, VEX_WIG,
31050b57cec5SDimitry Andric                    Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
31060b57cec5SDimitry Andric} // ExeDomain
31070b57cec5SDimitry Andric} // Predicates
31080b57cec5SDimitry Andric
31090b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
31100b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31110b57cec5SDimitry Andric                    "movntps\t{$src, $dst|$dst, $src}",
31120b57cec5SDimitry Andric                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
31130b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31140b57cec5SDimitry Andric                    "movntpd\t{$src, $dst|$dst, $src}",
31150b57cec5SDimitry Andric                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
31160b57cec5SDimitry Andric} // SchedRW
31170b57cec5SDimitry Andric
31180b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
31190b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
31200b57cec5SDimitry Andric                    "movntdq\t{$src, $dst|$dst, $src}",
31210b57cec5SDimitry Andric                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
31220b57cec5SDimitry Andric
31230b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in {
31240b57cec5SDimitry Andric// There is no AVX form for instructions below this point
31250b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
31260b57cec5SDimitry Andric                 "movnti{l}\t{$src, $dst|$dst, $src}",
31270b57cec5SDimitry Andric                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
31280b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
31290b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
31300b57cec5SDimitry Andric                     "movnti{q}\t{$src, $dst|$dst, $src}",
31310b57cec5SDimitry Andric                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
31320b57cec5SDimitry Andric                  PS, Requires<[HasSSE2]>;
31330b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT]
31340b57cec5SDimitry Andric
31350b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
31360b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
31370b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31380b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
31390b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31400b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
31410b57cec5SDimitry Andric            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
31420b57cec5SDimitry Andric
31430b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
31440b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31450b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
31460b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31470b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
31480b57cec5SDimitry Andric            (VMOVNTDQmr addr:$dst, VR128:$src)>;
31490b57cec5SDimitry Andric}
31500b57cec5SDimitry Andric
31510b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
31520b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
31530b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
31540b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
31550b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
31560b57cec5SDimitry Andric  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
31570b57cec5SDimitry Andric            (MOVNTDQmr addr:$dst, VR128:$src)>;
31580b57cec5SDimitry Andric}
31590b57cec5SDimitry Andric
31600b57cec5SDimitry Andric} // AddedComplexity
31610b57cec5SDimitry Andric
31620b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
31630b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence
31640b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
31650b57cec5SDimitry Andric
31660b57cec5SDimitry Andric// Prefetch intrinsic.
31670b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
31680b57cec5SDimitry Andricdef PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
31690b57cec5SDimitry Andric    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
31700b57cec5SDimitry Andricdef PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
31710b57cec5SDimitry Andric    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
31720b57cec5SDimitry Andricdef PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
31730b57cec5SDimitry Andric    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
31740b57cec5SDimitry Andricdef PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
31750b57cec5SDimitry Andric    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
31760b57cec5SDimitry Andric}
31770b57cec5SDimitry Andric
31780b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled?
31790b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in {
31800b57cec5SDimitry Andric// Flush cache
31810b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
31820b57cec5SDimitry Andric               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
31830b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
31840b57cec5SDimitry Andric}
31850b57cec5SDimitry Andric
31860b57cec5SDimitry Andriclet SchedRW = [WriteNop] in {
31870b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it
31880b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible.
31890b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins),
31900b57cec5SDimitry Andric              "pause", [(int_x86_sse2_pause)]>, OBXS;
31910b57cec5SDimitry Andric}
31920b57cec5SDimitry Andric
31930b57cec5SDimitry Andriclet SchedRW = [WriteFence] in {
31940b57cec5SDimitry Andric// Load, store, and memory fence
31955ffd83dbSDimitry Andric// TODO: As with mfence, we may want to ease the availability of sfence/lfence
31960b57cec5SDimitry Andric// to include any 64-bit target.
31975ffd83dbSDimitry Andricdef SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
31980b57cec5SDimitry Andric               PS, Requires<[HasSSE1]>;
31995ffd83dbSDimitry Andricdef LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
32000b57cec5SDimitry Andric               PS, Requires<[HasSSE2]>;
32015ffd83dbSDimitry Andricdef MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
32020b57cec5SDimitry Andric               PS, Requires<[HasMFence]>;
32030b57cec5SDimitry Andric} // SchedRW
32040b57cec5SDimitry Andric
32050b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>;
32060b57cec5SDimitry Andric
32070b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32080b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register
32090b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
32100b57cec5SDimitry Andric
32110b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in
32120b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
32130b57cec5SDimitry Andric               "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
32140b57cec5SDimitry Andric               VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
32150b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in
32160b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
32170b57cec5SDimitry Andric               "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
32180b57cec5SDimitry Andric               VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
32190b57cec5SDimitry Andric
32200b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in
32210b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
32220b57cec5SDimitry Andric              "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
32235ffd83dbSDimitry Andric              PS, Sched<[WriteLDMXCSR]>;
32240b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in
32250b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
32260b57cec5SDimitry Andric              "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
32275ffd83dbSDimitry Andric              PS, Sched<[WriteSTMXCSR]>;
32280b57cec5SDimitry Andric
32290b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
32300b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
32310b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
32320b57cec5SDimitry Andric
32330b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
32340b57cec5SDimitry Andric
32350b57cec5SDimitry Andriclet hasSideEffects = 0 in {
32360b57cec5SDimitry Andricdef VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
32370b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
32380b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
32390b57cec5SDimitry Andricdef VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
32400b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
32410b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
32420b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
32430b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
32440b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
32450b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
32460b57cec5SDimitry Andric                      "movdqu\t{$src, $dst|$dst, $src}", []>,
32470b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
32480b57cec5SDimitry Andric}
32490b57cec5SDimitry Andric
32500b57cec5SDimitry Andric// For Disassembler
32510b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
32520b57cec5SDimitry Andricdef VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
32530b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
32540b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
32550b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
32560b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
32570b57cec5SDimitry Andric                          "movdqa\t{$src, $dst|$dst, $src}", []>,
32580b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
32590b57cec5SDimitry Andric                          VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
32600b57cec5SDimitry Andricdef VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
32610b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
32620b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.XMM.RR]>,
32630b57cec5SDimitry Andric                          VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
32640b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
32650b57cec5SDimitry Andric                          "movdqu\t{$src, $dst|$dst, $src}", []>,
32660b57cec5SDimitry Andric                          Sched<[SchedWriteVecMoveLS.YMM.RR]>,
32670b57cec5SDimitry Andric                          VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
32680b57cec5SDimitry Andric}
32690b57cec5SDimitry Andric
32700b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
32710b57cec5SDimitry Andric    hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
32720b57cec5SDimitry Andricdef VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
32730b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
32740b57cec5SDimitry Andric                      [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
32750b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
32760b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
32770b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
32780b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.YMM.RM]>,
32790b57cec5SDimitry Andric                      VEX, VEX_L, VEX_WIG;
32800b57cec5SDimitry Andricdef VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
32810b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
32820b57cec5SDimitry Andric                   [(set VR128:$dst, (loadv2i64 addr:$src))]>,
32830b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>,
32840b57cec5SDimitry Andric                   XS, VEX, VEX_WIG;
32850b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
32860b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}", []>,
32870b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.RM]>,
32880b57cec5SDimitry Andric                   XS, VEX, VEX_L, VEX_WIG;
32890b57cec5SDimitry Andric}
32900b57cec5SDimitry Andric
32910b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
32920b57cec5SDimitry Andricdef VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
32930b57cec5SDimitry Andric                      (ins i128mem:$dst, VR128:$src),
32940b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}",
32950b57cec5SDimitry Andric                      [(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
32960b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG;
32970b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
32980b57cec5SDimitry Andric                      (ins i256mem:$dst, VR256:$src),
32990b57cec5SDimitry Andric                      "movdqa\t{$src, $dst|$dst, $src}", []>,
33000b57cec5SDimitry Andric                     Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG;
33010b57cec5SDimitry Andricdef VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33020b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",
33030b57cec5SDimitry Andric                   [(store (v2i64 VR128:$src), addr:$dst)]>,
33040b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG;
33050b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
33060b57cec5SDimitry Andric                   "vmovdqu\t{$src, $dst|$dst, $src}",[]>,
33070b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG;
33080b57cec5SDimitry Andric}
33090b57cec5SDimitry Andric
33100b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
33110b57cec5SDimitry Andriclet hasSideEffects = 0 in {
33120b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
33130b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}", []>;
33140b57cec5SDimitry Andric
33150b57cec5SDimitry Andricdef MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
33160b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}", []>,
33170b57cec5SDimitry Andric                   XS, Requires<[UseSSE2]>;
33180b57cec5SDimitry Andric}
33190b57cec5SDimitry Andric
33200b57cec5SDimitry Andric// For Disassembler
33210b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
33220b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
33230b57cec5SDimitry Andric                       "movdqa\t{$src, $dst|$dst, $src}", []>,
33240b57cec5SDimitry Andric                       FoldGenData<"MOVDQArr">;
33250b57cec5SDimitry Andric
33260b57cec5SDimitry Andricdef MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
33270b57cec5SDimitry Andric                       "movdqu\t{$src, $dst|$dst, $src}", []>,
33280b57cec5SDimitry Andric                       XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">;
33290b57cec5SDimitry Andric}
33300b57cec5SDimitry Andric} // SchedRW
33310b57cec5SDimitry Andric
33320b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
33330b57cec5SDimitry Andric    hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in {
33340b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33350b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
33360b57cec5SDimitry Andric                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
33370b57cec5SDimitry Andricdef MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
33380b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
33390b57cec5SDimitry Andric                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
33400b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
33410b57cec5SDimitry Andric}
33420b57cec5SDimitry Andric
33430b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0,
33440b57cec5SDimitry Andric    SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
33450b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33460b57cec5SDimitry Andric                   "movdqa\t{$src, $dst|$dst, $src}",
33470b57cec5SDimitry Andric                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
33480b57cec5SDimitry Andricdef MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
33490b57cec5SDimitry Andric                   "movdqu\t{$src, $dst|$dst, $src}",
33500b57cec5SDimitry Andric                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
33510b57cec5SDimitry Andric                 XS, Requires<[UseSSE2]>;
33520b57cec5SDimitry Andric}
33530b57cec5SDimitry Andric
33540b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
33550b57cec5SDimitry Andric
33560b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
33570b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
33580b57cec5SDimitry Andric                (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
33590b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
33600b57cec5SDimitry Andric                (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>;
33610b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
33620b57cec5SDimitry Andric                (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
33630b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}",
33640b57cec5SDimitry Andric                (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>;
33650b57cec5SDimitry Andric
33660b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility.
33670b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}",
33680b57cec5SDimitry Andric                (MOVDQArr_REV VR128:$dst, VR128:$src), 0>;
33690b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
33700b57cec5SDimitry Andric                (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>;
33710b57cec5SDimitry Andric
33720b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
33730b57cec5SDimitry Andric  // Additional patterns for other integer sizes.
33740b57cec5SDimitry Andric  def : Pat<(alignedloadv4i32 addr:$src),
33750b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
33760b57cec5SDimitry Andric  def : Pat<(alignedloadv8i16 addr:$src),
33770b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
33780b57cec5SDimitry Andric  def : Pat<(alignedloadv16i8 addr:$src),
33790b57cec5SDimitry Andric            (VMOVDQArm addr:$src)>;
33800b57cec5SDimitry Andric  def : Pat<(loadv4i32 addr:$src),
33810b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
33820b57cec5SDimitry Andric  def : Pat<(loadv8i16 addr:$src),
33830b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
33840b57cec5SDimitry Andric  def : Pat<(loadv16i8 addr:$src),
33850b57cec5SDimitry Andric            (VMOVDQUrm addr:$src)>;
33860b57cec5SDimitry Andric
33870b57cec5SDimitry Andric  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
33880b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
33890b57cec5SDimitry Andric  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
33900b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
33910b57cec5SDimitry Andric  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
33920b57cec5SDimitry Andric            (VMOVDQAmr addr:$dst, VR128:$src)>;
33930b57cec5SDimitry Andric  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
33940b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
33950b57cec5SDimitry Andric  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
33960b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
33970b57cec5SDimitry Andric  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
33980b57cec5SDimitry Andric            (VMOVDQUmr addr:$dst, VR128:$src)>;
33990b57cec5SDimitry Andric}
34000b57cec5SDimitry Andric
34010b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
34020b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions
34030b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
34040b57cec5SDimitry Andric
34050b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions
34060b57cec5SDimitry Andric
34070b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
34080b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
34090b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
34100b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
34110b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
34120b57cec5SDimitry Andric  let isCommutable = 1 in
34130b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
34140b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
34150b57cec5SDimitry Andric       !if(Is2Addr,
34160b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
34170b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
34180b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
34190b57cec5SDimitry Andric       Sched<[sched]>;
34200b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
34210b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
34220b57cec5SDimitry Andric       !if(Is2Addr,
34230b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
34240b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
34250b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
34260b57cec5SDimitry Andric                                     (memop_frag addr:$src2))))]>,
34270b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
34280b57cec5SDimitry Andric}
34290b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
34300b57cec5SDimitry Andric
34310b57cec5SDimitry Andricdefm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
34320b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34330b57cec5SDimitry Andricdefm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
34340b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34350b57cec5SDimitry Andricdefm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
34360b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
34370b57cec5SDimitry Andricdefm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
34380b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX>;
34390b57cec5SDimitry Andricdefm PADDSB  : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
34400b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34410b57cec5SDimitry Andricdefm PADDSW  : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
34420b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34430b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
34440b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34450b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
34460b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34470b57cec5SDimitry Andricdefm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
34480b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
34490b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
34500b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
34510b57cec5SDimitry Andricdefm PMULHW  : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
34520b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
34530b57cec5SDimitry Andricdefm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
34540b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34550b57cec5SDimitry Andricdefm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
34560b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34570b57cec5SDimitry Andricdefm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
34580b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
34590b57cec5SDimitry Andricdefm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
34600b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX>;
34610b57cec5SDimitry Andricdefm PSUBSB  : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
34620b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34630b57cec5SDimitry Andricdefm PSUBSW  : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
34640b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34650b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
34660b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34670b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
34680b57cec5SDimitry Andric                             SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
34690b57cec5SDimitry Andricdefm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
34700b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34710b57cec5SDimitry Andricdefm PMINSW  : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
34720b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34730b57cec5SDimitry Andricdefm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
34740b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34750b57cec5SDimitry Andricdefm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
34760b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34770b57cec5SDimitry Andricdefm PAVGB   : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
34780b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34790b57cec5SDimitry Andricdefm PAVGW   : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
34800b57cec5SDimitry Andric                             SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
34810b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
34820b57cec5SDimitry Andric                             SchedWriteVecIMul, 1, NoVLX>;
34830b57cec5SDimitry Andric
34840b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
34850b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
34860b57cec5SDimitry Andric                              load, i128mem, SchedWriteVecIMul.XMM, 0>,
34870b57cec5SDimitry Andric                              VEX_4V, VEX_WIG;
34880b57cec5SDimitry Andric
34890b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
34900b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
34910b57cec5SDimitry Andric                               VR256, load, i256mem, SchedWriteVecIMul.YMM,
34920b57cec5SDimitry Andric                               0>, VEX_4V, VEX_L, VEX_WIG;
34930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
34940b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
34950b57cec5SDimitry Andric                             memop, i128mem, SchedWriteVecIMul.XMM>;
34960b57cec5SDimitry Andric
34970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
34980b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
34990b57cec5SDimitry Andric                             load, i128mem, SchedWritePSADBW.XMM, 0>,
35000b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
35010b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
35020b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
35030b57cec5SDimitry Andric                             load, i256mem, SchedWritePSADBW.YMM, 0>,
35040b57cec5SDimitry Andric                             VEX_4V, VEX_L, VEX_WIG;
35050b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
35060b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
35070b57cec5SDimitry Andric                            memop, i128mem, SchedWritePSADBW.XMM>;
35080b57cec5SDimitry Andric
35090b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
35100b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions
35110b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
35120b57cec5SDimitry Andric
35130b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
35140b57cec5SDimitry Andric                         string OpcodeStr, SDNode OpNode,
35150b57cec5SDimitry Andric                         SDNode OpNode2, RegisterClass RC,
35160b57cec5SDimitry Andric                         X86FoldableSchedWrite sched,
35170b57cec5SDimitry Andric                         X86FoldableSchedWrite schedImm,
35180b57cec5SDimitry Andric                         ValueType DstVT, ValueType SrcVT,
35190b57cec5SDimitry Andric                         PatFrag ld_frag, bit Is2Addr = 1> {
35200b57cec5SDimitry Andric  // src2 is always 128-bit
35210b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
35220b57cec5SDimitry Andric       (ins RC:$src1, VR128:$src2),
35230b57cec5SDimitry Andric       !if(Is2Addr,
35240b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35250b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35260b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>,
35270b57cec5SDimitry Andric       Sched<[sched]>;
35280b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
35290b57cec5SDimitry Andric       (ins RC:$src1, i128mem:$src2),
35300b57cec5SDimitry Andric       !if(Is2Addr,
35310b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35320b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35330b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode RC:$src1,
35340b57cec5SDimitry Andric                       (SrcVT (ld_frag addr:$src2)))))]>,
35350b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
35360b57cec5SDimitry Andric  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
35370b57cec5SDimitry Andric       (ins RC:$src1, u8imm:$src2),
35380b57cec5SDimitry Andric       !if(Is2Addr,
35390b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35400b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35418bcb0991SDimitry Andric       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
35420b57cec5SDimitry Andric       Sched<[schedImm]>;
35430b57cec5SDimitry Andric}
35440b57cec5SDimitry Andric
35450b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
35460b57cec5SDimitry Andric                             string OpcodeStr, SDNode OpNode,
35470b57cec5SDimitry Andric                             SDNode OpNode2, ValueType DstVT128,
35480b57cec5SDimitry Andric                             ValueType DstVT256, ValueType SrcVT,
35490b57cec5SDimitry Andric                             X86SchedWriteWidths sched,
35500b57cec5SDimitry Andric                             X86SchedWriteWidths schedImm, Predicate prd> {
35510b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in
35520b57cec5SDimitry Andric  defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
35530b57cec5SDimitry Andric                              OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
35540b57cec5SDimitry Andric                              DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
35550b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in
35560b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
35570b57cec5SDimitry Andric                                OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
35580b57cec5SDimitry Andric                                DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
35590b57cec5SDimitry Andric                                VEX_WIG;
35600b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
35610b57cec5SDimitry Andric  defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
35620b57cec5SDimitry Andric                            VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
35630b57cec5SDimitry Andric                            memop>;
35640b57cec5SDimitry Andric}
35650b57cec5SDimitry Andric
35660b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
35670b57cec5SDimitry Andric                        SDNode OpNode, RegisterClass RC, ValueType VT,
35680b57cec5SDimitry Andric                        X86FoldableSchedWrite sched, bit Is2Addr = 1> {
35690b57cec5SDimitry Andric  def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
35700b57cec5SDimitry Andric       !if(Is2Addr,
35710b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
35720b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
35738bcb0991SDimitry Andric       [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
35740b57cec5SDimitry Andric       Sched<[sched]>;
35750b57cec5SDimitry Andric}
35760b57cec5SDimitry Andric
35770b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
35780b57cec5SDimitry Andric                            SDNode OpNode, X86SchedWriteWidths sched> {
35790b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
35800b57cec5SDimitry Andric  defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
35810b57cec5SDimitry Andric                             VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG;
35820b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
35830b57cec5SDimitry Andric  defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
35840b57cec5SDimitry Andric                               VR256, v32i8, sched.YMM, 0>,
35850b57cec5SDimitry Andric                               VEX_4V, VEX_L, VEX_WIG;
35860b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
35870b57cec5SDimitry Andric  defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
35880b57cec5SDimitry Andric                           sched.XMM>;
35890b57cec5SDimitry Andric}
35900b57cec5SDimitry Andric
35910b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
35920b57cec5SDimitry Andric  defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
35930b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
35940b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
35950b57cec5SDimitry Andric  defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
35960b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
35970b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
35980b57cec5SDimitry Andric  defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
35990b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
36000b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36010b57cec5SDimitry Andric
36020b57cec5SDimitry Andric  defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
36030b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
36040b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
36050b57cec5SDimitry Andric  defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
36060b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
36070b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36080b57cec5SDimitry Andric  defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
36090b57cec5SDimitry Andric                                 v2i64, v4i64, v2i64, SchedWriteVecShift,
36100b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36110b57cec5SDimitry Andric
36120b57cec5SDimitry Andric  defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
36130b57cec5SDimitry Andric                                 v8i16, v16i16, v8i16, SchedWriteVecShift,
36140b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>;
36150b57cec5SDimitry Andric  defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
36160b57cec5SDimitry Andric                                 v4i32, v8i32, v4i32, SchedWriteVecShift,
36170b57cec5SDimitry Andric                                 SchedWriteVecShiftImm, NoVLX>;
36180b57cec5SDimitry Andric
36190b57cec5SDimitry Andric  defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq,
36200b57cec5SDimitry Andric                                 SchedWriteShuffle>;
36210b57cec5SDimitry Andric  defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq,
36220b57cec5SDimitry Andric                                 SchedWriteShuffle>;
36230b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
36240b57cec5SDimitry Andric
36250b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36260b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions
36270b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36280b57cec5SDimitry Andric
36290b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
36300b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36310b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
36320b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36330b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
36340b57cec5SDimitry Andric                             SchedWriteVecALU, 1, TruePredicate>;
36350b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
36360b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36370b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
36380b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36390b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
36400b57cec5SDimitry Andric                             SchedWriteVecALU, 0, TruePredicate>;
36410b57cec5SDimitry Andric
36420b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36430b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions
36440b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
36450b57cec5SDimitry Andric
36460b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
36470b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
36480b57cec5SDimitry Andric                         SDNode OpNode, X86SchedWriteWidths sched,
36490b57cec5SDimitry Andric                         Predicate prd> {
36500b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in {
36510b57cec5SDimitry Andric  def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
36520b57cec5SDimitry Andric                      (ins VR128:$src1, u8imm:$src2),
36530b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
36540b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36550b57cec5SDimitry Andric                      [(set VR128:$dst,
36568bcb0991SDimitry Andric                        (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
36570b57cec5SDimitry Andric                      VEX, Sched<[sched.XMM]>, VEX_WIG;
36580b57cec5SDimitry Andric  def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
36590b57cec5SDimitry Andric                      (ins i128mem:$src1, u8imm:$src2),
36600b57cec5SDimitry Andric                      !strconcat("v", OpcodeStr,
36610b57cec5SDimitry Andric                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36620b57cec5SDimitry Andric                     [(set VR128:$dst,
36630b57cec5SDimitry Andric                       (vt128 (OpNode (load addr:$src1),
36648bcb0991SDimitry Andric                        (i8 timm:$src2))))]>, VEX,
36650b57cec5SDimitry Andric                  Sched<[sched.XMM.Folded]>, VEX_WIG;
36660b57cec5SDimitry Andric}
36670b57cec5SDimitry Andric
36680b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in {
36690b57cec5SDimitry Andric  def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
36700b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
36710b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
36720b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36730b57cec5SDimitry Andric                       [(set VR256:$dst,
36748bcb0991SDimitry Andric                         (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
36750b57cec5SDimitry Andric                       VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
36760b57cec5SDimitry Andric  def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
36770b57cec5SDimitry Andric                       (ins i256mem:$src1, u8imm:$src2),
36780b57cec5SDimitry Andric                       !strconcat("v", OpcodeStr,
36790b57cec5SDimitry Andric                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36800b57cec5SDimitry Andric                      [(set VR256:$dst,
36810b57cec5SDimitry Andric                        (vt256 (OpNode (load addr:$src1),
36828bcb0991SDimitry Andric                         (i8 timm:$src2))))]>, VEX, VEX_L,
36830b57cec5SDimitry Andric                   Sched<[sched.YMM.Folded]>, VEX_WIG;
36840b57cec5SDimitry Andric}
36850b57cec5SDimitry Andric
36860b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
36870b57cec5SDimitry Andric  def ri : Ii8<0x70, MRMSrcReg,
36880b57cec5SDimitry Andric               (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
36890b57cec5SDimitry Andric               !strconcat(OpcodeStr,
36900b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36910b57cec5SDimitry Andric               [(set VR128:$dst,
36928bcb0991SDimitry Andric                 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
36930b57cec5SDimitry Andric               Sched<[sched.XMM]>;
36940b57cec5SDimitry Andric  def mi : Ii8<0x70, MRMSrcMem,
36950b57cec5SDimitry Andric               (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
36960b57cec5SDimitry Andric               !strconcat(OpcodeStr,
36970b57cec5SDimitry Andric                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
36980b57cec5SDimitry Andric               [(set VR128:$dst,
36990b57cec5SDimitry Andric                 (vt128 (OpNode (memop addr:$src1),
37008bcb0991SDimitry Andric                        (i8 timm:$src2))))]>,
37010b57cec5SDimitry Andric               Sched<[sched.XMM.Folded]>;
37020b57cec5SDimitry Andric}
37030b57cec5SDimitry Andric}
37040b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
37050b57cec5SDimitry Andric
37060b57cec5SDimitry Andricdefm PSHUFD  : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd,
37070b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX>, PD;
37080b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
37090b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XS;
37100b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
37110b57cec5SDimitry Andric                             SchedWriteShuffle, NoVLX_Or_NoBWI>, XD;
37120b57cec5SDimitry Andric
37130b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
37140b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX)
37150b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
37160b57cec5SDimitry Andric
37170b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
37180b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
37190b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
37200b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
37210b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
37220b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
37230b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, RC:$src2),
37240b57cec5SDimitry Andric               !if(Is2Addr,
37250b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37260b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
37270b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37280b57cec5SDimitry Andric               [(set RC:$dst,
37290b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
37300b57cec5SDimitry Andric               Sched<[sched]>;
37310b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
37320b57cec5SDimitry Andric               (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
37330b57cec5SDimitry Andric               !if(Is2Addr,
37340b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37350b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
37360b57cec5SDimitry Andric                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37370b57cec5SDimitry Andric               [(set RC:$dst,
37380b57cec5SDimitry Andric                     (OutVT (OpNode (ArgVT RC:$src1),
37390b57cec5SDimitry Andric                                    (ld_frag addr:$src2))))]>,
37400b57cec5SDimitry Andric               Sched<[sched.Folded, sched.ReadAfterFold]>;
37410b57cec5SDimitry Andric}
37420b57cec5SDimitry Andric
37430b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
37440b57cec5SDimitry Andric                     ValueType ArgVT, SDNode OpNode, RegisterClass RC,
37450b57cec5SDimitry Andric                     X86MemOperand x86memop, X86FoldableSchedWrite sched,
37460b57cec5SDimitry Andric                     PatFrag ld_frag, bit Is2Addr = 1> {
37470b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg,
37480b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
37490b57cec5SDimitry Andric                 !if(Is2Addr,
37500b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37510b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
37520b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37530b57cec5SDimitry Andric                 [(set RC:$dst,
37540b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>,
37550b57cec5SDimitry Andric                 Sched<[sched]>;
37560b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem,
37570b57cec5SDimitry Andric                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
37580b57cec5SDimitry Andric                 !if(Is2Addr,
37590b57cec5SDimitry Andric                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
37600b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
37610b57cec5SDimitry Andric                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
37620b57cec5SDimitry Andric                 [(set RC:$dst,
37630b57cec5SDimitry Andric                       (OutVT (OpNode (ArgVT RC:$src1),
37640b57cec5SDimitry Andric                                      (ld_frag addr:$src2))))]>,
37650b57cec5SDimitry Andric                 Sched<[sched.Folded, sched.ReadAfterFold]>;
37660b57cec5SDimitry Andric}
37670b57cec5SDimitry Andric
37680b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
37690b57cec5SDimitry Andric  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
37700b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
37710b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
37720b57cec5SDimitry Andric  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
37730b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
37740b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
37750b57cec5SDimitry Andric
37760b57cec5SDimitry Andric  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
37770b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
37780b57cec5SDimitry Andric                             VEX_4V, VEX_WIG;
37790b57cec5SDimitry Andric  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
37800b57cec5SDimitry Andric                             i128mem, SchedWriteShuffle.XMM, load, 0>,
3781d409305fSDimitry Andric                             VEX_4V, VEX_WIG;
37820b57cec5SDimitry Andric}
37830b57cec5SDimitry Andric
37840b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
37850b57cec5SDimitry Andric  defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
37860b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
37870b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
37880b57cec5SDimitry Andric  defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
37890b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
37900b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
37910b57cec5SDimitry Andric
37920b57cec5SDimitry Andric  defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
37930b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
37940b57cec5SDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
37950b57cec5SDimitry Andric  defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
37960b57cec5SDimitry Andric                              i256mem, SchedWriteShuffle.YMM, load, 0>,
3797d409305fSDimitry Andric                              VEX_4V, VEX_L, VEX_WIG;
37980b57cec5SDimitry Andric}
37990b57cec5SDimitry Andric
38000b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
38010b57cec5SDimitry Andric  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
38020b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38030b57cec5SDimitry Andric  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
38040b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38050b57cec5SDimitry Andric
38060b57cec5SDimitry Andric  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
38070b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38080b57cec5SDimitry Andric
38090b57cec5SDimitry Andric  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
38100b57cec5SDimitry Andric                            i128mem, SchedWriteShuffle.XMM, memop>;
38110b57cec5SDimitry Andric}
38120b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
38130b57cec5SDimitry Andric
38140b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
38150b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions
38160b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
38170b57cec5SDimitry Andric
38180b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
38190b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
38200b57cec5SDimitry Andric                       SDNode OpNode, RegisterClass RC, X86MemOperand x86memop,
38210b57cec5SDimitry Andric                       X86FoldableSchedWrite sched, PatFrag ld_frag,
38220b57cec5SDimitry Andric                       bit Is2Addr = 1> {
38230b57cec5SDimitry Andric  def rr : PDI<opc, MRMSrcReg,
38240b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, RC:$src2),
38250b57cec5SDimitry Andric      !if(Is2Addr,
38260b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
38270b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38280b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
38290b57cec5SDimitry Andric      Sched<[sched]>;
38300b57cec5SDimitry Andric  def rm : PDI<opc, MRMSrcMem,
38310b57cec5SDimitry Andric      (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
38320b57cec5SDimitry Andric      !if(Is2Addr,
38330b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
38340b57cec5SDimitry Andric          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
38350b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
38360b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
38370b57cec5SDimitry Andric}
38380b57cec5SDimitry Andric
38390b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
38400b57cec5SDimitry Andric  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
38410b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38420b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38430b57cec5SDimitry Andric  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
38440b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38450b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38460b57cec5SDimitry Andric  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
38470b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38480b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38490b57cec5SDimitry Andric  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
38500b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38510b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38520b57cec5SDimitry Andric}
38530b57cec5SDimitry Andric
38540b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
38550b57cec5SDimitry Andric  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
38560b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38570b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38580b57cec5SDimitry Andric  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
38590b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38600b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38610b57cec5SDimitry Andric  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
38620b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38630b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38640b57cec5SDimitry Andric  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
38650b57cec5SDimitry Andric                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
38660b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
38670b57cec5SDimitry Andric}
38680b57cec5SDimitry Andric
38690b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
38700b57cec5SDimitry Andric  defm VPUNPCKLBWY  : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
38710b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38720b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38730b57cec5SDimitry Andric  defm VPUNPCKLWDY  : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
38740b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38750b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38760b57cec5SDimitry Andric  defm VPUNPCKHBWY  : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
38770b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38780b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38790b57cec5SDimitry Andric  defm VPUNPCKHWDY  : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
38800b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38810b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38820b57cec5SDimitry Andric}
38830b57cec5SDimitry Andric
38840b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
38850b57cec5SDimitry Andric  defm VPUNPCKLDQY  : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
38860b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38870b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38880b57cec5SDimitry Andric  defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
38890b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38900b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38910b57cec5SDimitry Andric  defm VPUNPCKHDQY  : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
38920b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38930b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38940b57cec5SDimitry Andric  defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
38950b57cec5SDimitry Andric                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
38960b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
38970b57cec5SDimitry Andric}
38980b57cec5SDimitry Andric
38990b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
39000b57cec5SDimitry Andric  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
39010b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39020b57cec5SDimitry Andric  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
39030b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39040b57cec5SDimitry Andric  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
39050b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39060b57cec5SDimitry Andric  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
39070b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39080b57cec5SDimitry Andric
39090b57cec5SDimitry Andric  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
39100b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39110b57cec5SDimitry Andric  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
39120b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39130b57cec5SDimitry Andric  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
39140b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39150b57cec5SDimitry Andric  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
39160b57cec5SDimitry Andric                                i128mem, SchedWriteShuffle.XMM, memop>;
39170b57cec5SDimitry Andric}
39180b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
39190b57cec5SDimitry Andric
39200b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39210b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert
39220b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39230b57cec5SDimitry Andric
39240b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
39250b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> {
39260b57cec5SDimitry Andric  def rr : Ii8<0xC4, MRMSrcReg,
39270b57cec5SDimitry Andric       (outs VR128:$dst), (ins VR128:$src1,
39280b57cec5SDimitry Andric        GR32orGR64:$src2, u8imm:$src3),
39290b57cec5SDimitry Andric       !if(Is2Addr,
39300b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
39310b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
39320b57cec5SDimitry Andric       [(set VR128:$dst,
3933e8d8bef9SDimitry Andric         (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
39340b57cec5SDimitry Andric       Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
39350b57cec5SDimitry Andric  def rm : Ii8<0xC4, MRMSrcMem,
39360b57cec5SDimitry Andric                      (outs VR128:$dst), (ins VR128:$src1,
39370b57cec5SDimitry Andric                       i16mem:$src2, u8imm:$src3),
39380b57cec5SDimitry Andric       !if(Is2Addr,
39390b57cec5SDimitry Andric           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
39400b57cec5SDimitry Andric           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
39410b57cec5SDimitry Andric       [(set VR128:$dst,
39420b57cec5SDimitry Andric         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
3943e8d8bef9SDimitry Andric                    timm:$src3))]>,
39440b57cec5SDimitry Andric       Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
39450b57cec5SDimitry Andric}
39460b57cec5SDimitry Andric
39470b57cec5SDimitry Andric// Extract
39480b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
39490b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
39500b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
39510b57cec5SDimitry Andric                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
39520b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
3953e8d8bef9SDimitry Andric                                            timm:$src2))]>,
39540b57cec5SDimitry Andric                PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
39550b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
39560b57cec5SDimitry Andric                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
39570b57cec5SDimitry Andric                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
39580b57cec5SDimitry Andric                    [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
3959e8d8bef9SDimitry Andric                                            timm:$src2))]>,
39600b57cec5SDimitry Andric               Sched<[WriteVecExtract]>;
39610b57cec5SDimitry Andric
39620b57cec5SDimitry Andric// Insert
39630b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
39640b57cec5SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
39650b57cec5SDimitry Andric
39660b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
39670b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD;
39680b57cec5SDimitry Andric
39690b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
39700b57cec5SDimitry Andric
39710b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39720b57cec5SDimitry Andric// SSE2 - Packed Mask Creation
39730b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39740b57cec5SDimitry Andric
39750b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
39760b57cec5SDimitry Andric
39770b57cec5SDimitry Andricdef VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
39780b57cec5SDimitry Andric           (ins VR128:$src),
39790b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
39800b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
39810b57cec5SDimitry Andric           Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG;
39820b57cec5SDimitry Andric
39830b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
39840b57cec5SDimitry Andricdef VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
39850b57cec5SDimitry Andric           (ins VR256:$src),
39860b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
39870b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
39880b57cec5SDimitry Andric           Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG;
39890b57cec5SDimitry Andric}
39900b57cec5SDimitry Andric
39910b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
39920b57cec5SDimitry Andric           "pmovmskb\t{$src, $dst|$dst, $src}",
39930b57cec5SDimitry Andric           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
39940b57cec5SDimitry Andric           Sched<[WriteVecMOVMSK]>;
39950b57cec5SDimitry Andric
39960b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
39970b57cec5SDimitry Andric
39980b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
39990b57cec5SDimitry Andric// SSE2 - Conditional Store
40000b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40010b57cec5SDimitry Andric
40020b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
40030b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
40040b57cec5SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
40050b57cec5SDimitry Andric           (ins VR128:$src, VR128:$mask),
40060b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
40070b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
40080b57cec5SDimitry Andric           VEX, VEX_WIG;
40090b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
40100b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
40110b57cec5SDimitry Andric           (ins VR128:$src, VR128:$mask),
40120b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
40130b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
4014*fe6060f1SDimitry Andric           VEX, VEX_WIG, AdSize64;
4015*fe6060f1SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
4016*fe6060f1SDimitry Andricdef VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
4017*fe6060f1SDimitry Andric           (ins VR128:$src, VR128:$mask), "",
4018*fe6060f1SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
4019*fe6060f1SDimitry Andric           VEX, VEX_WIG, AdSize32 {
4020*fe6060f1SDimitry Andric  let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
4021*fe6060f1SDimitry Andric  let AsmVariantName = "NonParsable";
4022*fe6060f1SDimitry Andric}
40230b57cec5SDimitry Andric
40240b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
40250b57cec5SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
40260b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
40270b57cec5SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
40280b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
40290b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
40300b57cec5SDimitry Andric           "maskmovdqu\t{$mask, $src|$src, $mask}",
4031*fe6060f1SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
4032*fe6060f1SDimitry Andric           AdSize64;
4033*fe6060f1SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
4034*fe6060f1SDimitry Andricdef MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
4035*fe6060f1SDimitry Andric           "addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
4036*fe6060f1SDimitry Andric           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
4037*fe6060f1SDimitry Andric           AdSize32 {
4038*fe6060f1SDimitry Andric  let AsmVariantName = "NonParsable";
4039*fe6060f1SDimitry Andric}
40400b57cec5SDimitry Andric
40410b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
40420b57cec5SDimitry Andric
40430b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40440b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword
40450b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40460b57cec5SDimitry Andric
40470b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
40480b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int
40490b57cec5SDimitry Andric//
40500b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
40510b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
40520b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
40530b57cec5SDimitry Andric                        [(set VR128:$dst,
40540b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector GR32:$src)))]>,
40550b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
40560b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
40570b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
40580b57cec5SDimitry Andric                        [(set VR128:$dst,
40590b57cec5SDimitry Andric                          (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
40600b57cec5SDimitry Andric                        VEX, Sched<[WriteVecLoad]>;
40610b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
40620b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
40630b57cec5SDimitry Andric                          [(set VR128:$dst,
40640b57cec5SDimitry Andric                            (v2i64 (scalar_to_vector GR64:$src)))]>,
40650b57cec5SDimitry Andric                          VEX, Sched<[WriteVecMoveFromGpr]>;
40660b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
40670b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
40680b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
40690b57cec5SDimitry Andric                          VEX, Sched<[WriteVecLoad]>;
40700b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
40710b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
40720b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
40730b57cec5SDimitry Andric                         [(set FR64:$dst, (bitconvert GR64:$src))]>,
40740b57cec5SDimitry Andric                         VEX, Sched<[WriteVecMoveFromGpr]>;
40750b57cec5SDimitry Andric
40760b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
40770b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
40780b57cec5SDimitry Andric                      [(set VR128:$dst,
40790b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector GR32:$src)))]>,
40800b57cec5SDimitry Andric                      Sched<[WriteVecMoveFromGpr]>;
40810b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
40820b57cec5SDimitry Andric                      "movd\t{$src, $dst|$dst, $src}",
40830b57cec5SDimitry Andric                      [(set VR128:$dst,
40840b57cec5SDimitry Andric                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
40850b57cec5SDimitry Andric                      Sched<[WriteVecLoad]>;
40860b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
40870b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
40880b57cec5SDimitry Andric                        [(set VR128:$dst,
40890b57cec5SDimitry Andric                          (v2i64 (scalar_to_vector GR64:$src)))]>,
40900b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
40910b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
40920b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
40930b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
40940b57cec5SDimitry Andric                        Sched<[WriteVecLoad]>;
40950b57cec5SDimitry Andriclet isCodeGenOnly = 1 in
40960b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
40970b57cec5SDimitry Andric                       "movq\t{$src, $dst|$dst, $src}",
40980b57cec5SDimitry Andric                       [(set FR64:$dst, (bitconvert GR64:$src))]>,
40990b57cec5SDimitry Andric                       Sched<[WriteVecMoveFromGpr]>;
41000b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
41010b57cec5SDimitry Andric
41020b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41030b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar
41040b57cec5SDimitry Andric//
41050b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
41060b57cec5SDimitry Andric  def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
41070b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41080b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
41090b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveFromGpr]>;
41100b57cec5SDimitry Andric
41110b57cec5SDimitry Andric  def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
41120b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41130b57cec5SDimitry Andric                        [(set FR32:$dst, (bitconvert GR32:$src))]>,
41140b57cec5SDimitry Andric                        Sched<[WriteVecMoveFromGpr]>;
41150b57cec5SDimitry Andric
41160b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
41170b57cec5SDimitry Andric
41180b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41190b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int
41200b57cec5SDimitry Andric//
41210b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
41220b57cec5SDimitry Andricdef VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
41230b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
41240b57cec5SDimitry Andric                         [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
41250b57cec5SDimitry Andric                                          (iPTR 0)))]>, VEX,
41260b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
41270b57cec5SDimitry Andricdef VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
41280b57cec5SDimitry Andric                         (ins i32mem:$dst, VR128:$src),
41290b57cec5SDimitry Andric                         "movd\t{$src, $dst|$dst, $src}",
41300b57cec5SDimitry Andric                         [(store (i32 (extractelt (v4i32 VR128:$src),
41310b57cec5SDimitry Andric                                       (iPTR 0))), addr:$dst)]>,
41320b57cec5SDimitry Andric                         VEX, Sched<[WriteVecStore]>;
41330b57cec5SDimitry Andricdef MOVPDI2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
41340b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
41350b57cec5SDimitry Andric                       [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
41360b57cec5SDimitry Andric                                        (iPTR 0)))]>,
41370b57cec5SDimitry Andric                   Sched<[WriteVecMoveToGpr]>;
41380b57cec5SDimitry Andricdef MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
41390b57cec5SDimitry Andric                       "movd\t{$src, $dst|$dst, $src}",
41400b57cec5SDimitry Andric                       [(store (i32 (extractelt (v4i32 VR128:$src),
41410b57cec5SDimitry Andric                                     (iPTR 0))), addr:$dst)]>,
41420b57cec5SDimitry Andric                       Sched<[WriteVecStore]>;
41430b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
41440b57cec5SDimitry Andric
41450b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41460b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int
41470b57cec5SDimitry Andric//
41480b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
41490b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in {
41500b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
41510b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}",
41520b57cec5SDimitry Andric                          [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
41530b57cec5SDimitry Andric                                                        (iPTR 0)))]>,
41540b57cec5SDimitry Andric                      VEX;
41550b57cec5SDimitry Andric
41560b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
41570b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
41580b57cec5SDimitry Andric                        [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
41590b57cec5SDimitry Andric                                                         (iPTR 0)))]>;
41600b57cec5SDimitry Andric} //SchedRW
41610b57cec5SDimitry Andric
41620b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
41630b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
41640b57cec5SDimitry Andric                          (ins i64mem:$dst, VR128:$src),
41650b57cec5SDimitry Andric                          "movq\t{$src, $dst|$dst, $src}", []>,
41660b57cec5SDimitry Andric                          VEX, Sched<[WriteVecStore]>;
41670b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
41680b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
41690b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}", []>,
41700b57cec5SDimitry Andric                        Sched<[WriteVecStore]>;
41710b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
41720b57cec5SDimitry Andric
41730b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41740b57cec5SDimitry Andric// Bitcast FR64 <-> GR64
41750b57cec5SDimitry Andric//
41760b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
41770b57cec5SDimitry Andric  def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
41780b57cec5SDimitry Andric                           "movq\t{$src, $dst|$dst, $src}",
41790b57cec5SDimitry Andric                           [(set GR64:$dst, (bitconvert FR64:$src))]>,
41800b57cec5SDimitry Andric                           VEX, Sched<[WriteVecMoveToGpr]>;
41810b57cec5SDimitry Andric
41820b57cec5SDimitry Andric  def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
41830b57cec5SDimitry Andric                         "movq\t{$src, $dst|$dst, $src}",
41840b57cec5SDimitry Andric                         [(set GR64:$dst, (bitconvert FR64:$src))]>,
41850b57cec5SDimitry Andric                         Sched<[WriteVecMoveToGpr]>;
41860b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
41870b57cec5SDimitry Andric
41880b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
41890b57cec5SDimitry Andric// Move Scalar Single to Double Int
41900b57cec5SDimitry Andric//
41910b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
41920b57cec5SDimitry Andric  def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
41930b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41940b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
41950b57cec5SDimitry Andric                        VEX, Sched<[WriteVecMoveToGpr]>;
41960b57cec5SDimitry Andric  def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
41970b57cec5SDimitry Andric                        "movd\t{$src, $dst|$dst, $src}",
41980b57cec5SDimitry Andric                        [(set GR32:$dst, (bitconvert FR32:$src))]>,
41990b57cec5SDimitry Andric                        Sched<[WriteVecMoveToGpr]>;
42000b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
42010b57cec5SDimitry Andric
42020b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
42030b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
42040b57cec5SDimitry Andric            (VMOVDI2PDIrr GR32:$src)>;
42050b57cec5SDimitry Andric
42060b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
42070b57cec5SDimitry Andric            (VMOV64toPQIrr GR64:$src)>;
42080b57cec5SDimitry Andric
42090b57cec5SDimitry Andric  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
42100b57cec5SDimitry Andric  // These instructions also write zeros in the high part of a 256-bit register.
42110b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
42120b57cec5SDimitry Andric            (VMOVDI2PDIrm addr:$src)>;
42130b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzload32 addr:$src)),
42140b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
42150b57cec5SDimitry Andric}
42160b57cec5SDimitry Andric
42170b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
42180b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
42190b57cec5SDimitry Andric            (MOVDI2PDIrr GR32:$src)>;
42200b57cec5SDimitry Andric
42210b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
42220b57cec5SDimitry Andric            (MOV64toPQIrr GR64:$src)>;
42230b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzload32 addr:$src)),
42240b57cec5SDimitry Andric            (MOVDI2PDIrm addr:$src)>;
42250b57cec5SDimitry Andric}
42260b57cec5SDimitry Andric
42270b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
42280b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add
42290b57cec5SDimitry Andric// these aliases.
42300b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
42310b57cec5SDimitry Andric                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
42320b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}",
42330b57cec5SDimitry Andric                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
42340b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
42350b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
42360b57cec5SDimitry Andric                (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
42370b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
42380b57cec5SDimitry Andric                (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
42390b57cec5SDimitry Andric
42400b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42410b57cec5SDimitry Andric// SSE2 - Move Quadword
42420b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42430b57cec5SDimitry Andric
42440b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42450b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int
42460b57cec5SDimitry Andric//
42470b57cec5SDimitry Andric
42480b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in {
42490b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
42500b57cec5SDimitry Andric                    "vmovq\t{$src, $dst|$dst, $src}",
42510b57cec5SDimitry Andric                    [(set VR128:$dst,
42520b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
42530b57cec5SDimitry Andric                    VEX, Requires<[UseAVX]>, VEX_WIG;
42540b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
42550b57cec5SDimitry Andric                    "movq\t{$src, $dst|$dst, $src}",
42560b57cec5SDimitry Andric                    [(set VR128:$dst,
42570b57cec5SDimitry Andric                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
42580b57cec5SDimitry Andric                    XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
42590b57cec5SDimitry Andric} // ExeDomain, SchedRW
42600b57cec5SDimitry Andric
42610b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
42620b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int
42630b57cec5SDimitry Andric//
42640b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in {
42650b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
42660b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
42670b57cec5SDimitry Andric                        [(store (i64 (extractelt (v2i64 VR128:$src),
42680b57cec5SDimitry Andric                                      (iPTR 0))), addr:$dst)]>,
42690b57cec5SDimitry Andric                        VEX, VEX_WIG;
42700b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
42710b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}",
42720b57cec5SDimitry Andric                      [(store (i64 (extractelt (v2i64 VR128:$src),
42730b57cec5SDimitry Andric                                    (iPTR 0))), addr:$dst)]>;
42740b57cec5SDimitry Andric} // ExeDomain, SchedRW
42750b57cec5SDimitry Andric
42760b57cec5SDimitry Andric// For disassembler only
42770b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
42780b57cec5SDimitry Andric    SchedRW = [SchedWriteVecLogic.XMM] in {
42790b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
42800b57cec5SDimitry Andric                     "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
42810b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
42820b57cec5SDimitry Andric                      "movq\t{$src, $dst|$dst, $src}", []>;
42830b57cec5SDimitry Andric}
42840b57cec5SDimitry Andric
42850b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
42860b57cec5SDimitry Andric                (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
42870b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
42880b57cec5SDimitry Andric                (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
42890b57cec5SDimitry Andric
42900b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
42910b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)),
42920b57cec5SDimitry Andric            (VMOVQI2PQIrm addr:$src)>;
42930b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzload64 addr:$src)),
42940b57cec5SDimitry Andric            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
42950b57cec5SDimitry Andric
42960b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
42970b57cec5SDimitry Andric            (VMOVPQI2QImr addr:$dst, VR128:$src)>;
42980b57cec5SDimitry Andric}
42990b57cec5SDimitry Andric
43000b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
43010b57cec5SDimitry Andric  def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
43020b57cec5SDimitry Andric
43030b57cec5SDimitry Andric  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
43040b57cec5SDimitry Andric            (MOVPQI2QImr addr:$dst, VR128:$src)>;
43050b57cec5SDimitry Andric}
43060b57cec5SDimitry Andric
43070b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43080b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
43090b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits.
43100b57cec5SDimitry Andric//
43110b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
43120b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
43130b57cec5SDimitry Andric                        "vmovq\t{$src, $dst|$dst, $src}",
43140b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
43150b57cec5SDimitry Andric                         XS, VEX, Requires<[UseAVX]>, VEX_WIG;
43160b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
43170b57cec5SDimitry Andric                        "movq\t{$src, $dst|$dst, $src}",
43180b57cec5SDimitry Andric                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
43190b57cec5SDimitry Andric                        XS, Requires<[UseSSE2]>;
43200b57cec5SDimitry Andric} // ExeDomain, SchedRW
43210b57cec5SDimitry Andric
43220b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
43230b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
43240b57cec5SDimitry Andric            (VMOVZPQILo2PQIrr VR128:$src)>;
43250b57cec5SDimitry Andric}
43260b57cec5SDimitry Andriclet Predicates = [UseSSE2] in {
43270b57cec5SDimitry Andric  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
43280b57cec5SDimitry Andric            (MOVZPQILo2PQIrr VR128:$src)>;
43290b57cec5SDimitry Andric}
43300b57cec5SDimitry Andric
43310b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
43320b57cec5SDimitry Andric  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
43330b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
43340b57cec5SDimitry Andric             (v2f64 (VMOVZPQILo2PQIrr
43350b57cec5SDimitry Andric                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
43360b57cec5SDimitry Andric             sub_xmm)>;
43370b57cec5SDimitry Andric  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
43380b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
43390b57cec5SDimitry Andric             (v2i64 (VMOVZPQILo2PQIrr
43400b57cec5SDimitry Andric                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
43410b57cec5SDimitry Andric             sub_xmm)>;
43420b57cec5SDimitry Andric}
43430b57cec5SDimitry Andric
43440b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43450b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
43460b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
43470b57cec5SDimitry Andric
43480b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
43490b57cec5SDimitry Andric                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
43500b57cec5SDimitry Andric                              X86MemOperand x86memop, X86FoldableSchedWrite sched> {
43510b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
43520b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
43530b57cec5SDimitry Andric                      [(set RC:$dst, (vt (OpNode RC:$src)))]>,
43540b57cec5SDimitry Andric                      Sched<[sched]>;
43550b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
43560b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
43570b57cec5SDimitry Andric                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>,
43580b57cec5SDimitry Andric                      Sched<[sched.Folded]>;
43590b57cec5SDimitry Andric}
43600b57cec5SDimitry Andric
43610b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
43620b57cec5SDimitry Andric  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
43630b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
43640b57cec5SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
43650b57cec5SDimitry Andric  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
43660b57cec5SDimitry Andric                                       v4f32, VR128, loadv4f32, f128mem,
43670b57cec5SDimitry Andric                                       SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
43680b57cec5SDimitry Andric  defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
43690b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
43700b57cec5SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
43710b57cec5SDimitry Andric  defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
43720b57cec5SDimitry Andric                                       v8f32, VR256, loadv8f32, f256mem,
43730b57cec5SDimitry Andric                                       SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
43740b57cec5SDimitry Andric}
43750b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
43760b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
43770b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
43780b57cec5SDimitry Andric                                   memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
43790b57cec5SDimitry Andric
43800b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
43810b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
43820b57cec5SDimitry Andric            (VMOVSHDUPrr VR128:$src)>;
43830b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
43840b57cec5SDimitry Andric            (VMOVSHDUPrm addr:$src)>;
43850b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
43860b57cec5SDimitry Andric            (VMOVSLDUPrr VR128:$src)>;
43870b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
43880b57cec5SDimitry Andric            (VMOVSLDUPrm addr:$src)>;
43890b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup VR256:$src)),
43900b57cec5SDimitry Andric            (VMOVSHDUPYrr VR256:$src)>;
43910b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
43920b57cec5SDimitry Andric            (VMOVSHDUPYrm addr:$src)>;
43930b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup VR256:$src)),
43940b57cec5SDimitry Andric            (VMOVSLDUPYrr VR256:$src)>;
43950b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
43960b57cec5SDimitry Andric            (VMOVSLDUPYrm addr:$src)>;
43970b57cec5SDimitry Andric}
43980b57cec5SDimitry Andric
43990b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
44000b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
44010b57cec5SDimitry Andric            (MOVSHDUPrr VR128:$src)>;
44020b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
44030b57cec5SDimitry Andric            (MOVSHDUPrm addr:$src)>;
44040b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
44050b57cec5SDimitry Andric            (MOVSLDUPrr VR128:$src)>;
44060b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
44070b57cec5SDimitry Andric            (MOVSLDUPrm addr:$src)>;
44080b57cec5SDimitry Andric}
44090b57cec5SDimitry Andric
44100b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44110b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP
44120b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44130b57cec5SDimitry Andric
44140b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> {
44150b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
44160b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44170b57cec5SDimitry Andric                    [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>,
44180b57cec5SDimitry Andric                    Sched<[sched.XMM]>;
44190b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
44200b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44210b57cec5SDimitry Andric                    [(set VR128:$dst,
44220b57cec5SDimitry Andric                      (v2f64 (X86Movddup
44230b57cec5SDimitry Andric                              (scalar_to_vector (loadf64 addr:$src)))))]>,
44240b57cec5SDimitry Andric                    Sched<[sched.XMM.Folded]>;
44250b57cec5SDimitry Andric}
44260b57cec5SDimitry Andric
44270b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version
44280b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> {
44290b57cec5SDimitry Andricdef rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
44300b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44310b57cec5SDimitry Andric                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
44320b57cec5SDimitry Andric                    Sched<[sched.YMM]>;
44330b57cec5SDimitry Andricdef rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
44340b57cec5SDimitry Andric                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
44350b57cec5SDimitry Andric                    [(set VR256:$dst,
44360b57cec5SDimitry Andric                      (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
44370b57cec5SDimitry Andric                    Sched<[sched.YMM.Folded]>;
44380b57cec5SDimitry Andric}
44390b57cec5SDimitry Andric
44400b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
44410b57cec5SDimitry Andric  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>,
44420b57cec5SDimitry Andric                                      VEX, VEX_WIG;
44430b57cec5SDimitry Andric  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>,
44440b57cec5SDimitry Andric                                        VEX, VEX_L, VEX_WIG;
44450b57cec5SDimitry Andric}
44460b57cec5SDimitry Andric
44470b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
44480b57cec5SDimitry Andric
44490b57cec5SDimitry Andric
44500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
44510b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
44520b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
44530b57cec5SDimitry Andric}
44540b57cec5SDimitry Andric
44550b57cec5SDimitry Andriclet Predicates = [UseSSE3] in {
44560b57cec5SDimitry Andric  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
44570b57cec5SDimitry Andric            (MOVDDUPrm addr:$src)>;
44580b57cec5SDimitry Andric}
44590b57cec5SDimitry Andric
44600b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44610b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer
44620b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44630b57cec5SDimitry Andric
44640b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
44650b57cec5SDimitry Andric  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
44660b57cec5SDimitry Andric                      "vlddqu\t{$src, $dst|$dst, $src}",
44670b57cec5SDimitry Andric                      [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
44680b57cec5SDimitry Andric                      Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
44690b57cec5SDimitry Andric  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
44700b57cec5SDimitry Andric                       "vlddqu\t{$src, $dst|$dst, $src}",
44710b57cec5SDimitry Andric                       [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
44720b57cec5SDimitry Andric                       Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
44730b57cec5SDimitry Andric} // Predicates
44740b57cec5SDimitry Andric
44750b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
44760b57cec5SDimitry Andric                   "lddqu\t{$src, $dst|$dst, $src}",
44770b57cec5SDimitry Andric                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
44780b57cec5SDimitry Andric                   Sched<[SchedWriteVecMoveLS.XMM.RM]>;
44790b57cec5SDimitry Andric
44800b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44810b57cec5SDimitry Andric// SSE3 - Arithmetic
44820b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
44830b57cec5SDimitry Andric
44840b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
44850b57cec5SDimitry Andric                       X86MemOperand x86memop, X86FoldableSchedWrite sched,
44860b57cec5SDimitry Andric                       PatFrag ld_frag, bit Is2Addr = 1> {
4487480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
44880b57cec5SDimitry Andric  def rr : I<0xD0, MRMSrcReg,
44890b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, RC:$src2),
44900b57cec5SDimitry Andric       !if(Is2Addr,
44910b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
44920b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
44930b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>,
44940b57cec5SDimitry Andric       Sched<[sched]>;
44950b57cec5SDimitry Andric  def rm : I<0xD0, MRMSrcMem,
44960b57cec5SDimitry Andric       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
44970b57cec5SDimitry Andric       !if(Is2Addr,
44980b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
44990b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45000b57cec5SDimitry Andric       [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
45010b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
45020b57cec5SDimitry Andric}
4503480093f4SDimitry Andric}
45040b57cec5SDimitry Andric
45050b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
45060b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
45070b57cec5SDimitry Andric    defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
45080b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
45090b57cec5SDimitry Andric                                 XD, VEX_4V, VEX_WIG;
45100b57cec5SDimitry Andric    defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
45110b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
45120b57cec5SDimitry Andric                                  XD, VEX_4V, VEX_L, VEX_WIG;
45130b57cec5SDimitry Andric  }
45140b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
45150b57cec5SDimitry Andric    defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
45160b57cec5SDimitry Andric                                 SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
45170b57cec5SDimitry Andric                                 PD, VEX_4V, VEX_WIG;
45180b57cec5SDimitry Andric    defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
45190b57cec5SDimitry Andric                                  SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
45200b57cec5SDimitry Andric                                  PD, VEX_4V, VEX_L, VEX_WIG;
45210b57cec5SDimitry Andric  }
45220b57cec5SDimitry Andric}
45230b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
45240b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
45250b57cec5SDimitry Andric  defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
45260b57cec5SDimitry Andric                              SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD;
45270b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
45280b57cec5SDimitry Andric  defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
45290b57cec5SDimitry Andric                              SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD;
45300b57cec5SDimitry Andric}
45310b57cec5SDimitry Andric
45320b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45330b57cec5SDimitry Andric// SSE3 Instructions
45340b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
45350b57cec5SDimitry Andric
45360b57cec5SDimitry Andric// Horizontal ops
45370b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
45380b57cec5SDimitry Andric                   X86MemOperand x86memop, SDNode OpNode,
45390b57cec5SDimitry Andric                   X86FoldableSchedWrite sched, PatFrag ld_frag,
45400b57cec5SDimitry Andric                   bit Is2Addr = 1> {
4541480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
45420b57cec5SDimitry Andric  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
45430b57cec5SDimitry Andric       !if(Is2Addr,
45440b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45450b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45460b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
45470b57cec5SDimitry Andric      Sched<[sched]>;
45480b57cec5SDimitry Andric
45490b57cec5SDimitry Andric  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
45500b57cec5SDimitry Andric       !if(Is2Addr,
45510b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45520b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45530b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
45540b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
45550b57cec5SDimitry Andric}
4556480093f4SDimitry Andric}
45570b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
45580b57cec5SDimitry Andric                  X86MemOperand x86memop, SDNode OpNode,
45590b57cec5SDimitry Andric                  X86FoldableSchedWrite sched, PatFrag ld_frag,
45600b57cec5SDimitry Andric                  bit Is2Addr = 1> {
4561480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
45620b57cec5SDimitry Andric  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
45630b57cec5SDimitry Andric       !if(Is2Addr,
45640b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45650b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45660b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>,
45670b57cec5SDimitry Andric        Sched<[sched]>;
45680b57cec5SDimitry Andric
45690b57cec5SDimitry Andric  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
45700b57cec5SDimitry Andric       !if(Is2Addr,
45710b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
45720b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
45730b57cec5SDimitry Andric      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
45740b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
45750b57cec5SDimitry Andric}
4576480093f4SDimitry Andric}
45770b57cec5SDimitry Andric
45780b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
45790b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
45800b57cec5SDimitry Andric    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
45810b57cec5SDimitry Andric                            X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
45820b57cec5SDimitry Andric    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
45830b57cec5SDimitry Andric                            X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
45840b57cec5SDimitry Andric    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
45850b57cec5SDimitry Andric                            X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
45860b57cec5SDimitry Andric    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
45870b57cec5SDimitry Andric                            X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
45880b57cec5SDimitry Andric  }
45890b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
45900b57cec5SDimitry Andric    defm VHADDPD  : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
45910b57cec5SDimitry Andric                           X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
45920b57cec5SDimitry Andric    defm VHSUBPD  : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
45930b57cec5SDimitry Andric                           X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
45940b57cec5SDimitry Andric    defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
45950b57cec5SDimitry Andric                           X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
45960b57cec5SDimitry Andric    defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
45970b57cec5SDimitry Andric                           X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
45980b57cec5SDimitry Andric  }
45990b57cec5SDimitry Andric}
46000b57cec5SDimitry Andric
46010b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
46020b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in {
46030b57cec5SDimitry Andric    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
46040b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
46050b57cec5SDimitry Andric    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
46060b57cec5SDimitry Andric                          WriteFHAdd, memopv4f32>;
46070b57cec5SDimitry Andric  }
46080b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in {
46090b57cec5SDimitry Andric    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
46100b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
46110b57cec5SDimitry Andric    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
46120b57cec5SDimitry Andric                         WriteFHAdd, memopv2f64>;
46130b57cec5SDimitry Andric  }
46140b57cec5SDimitry Andric}
46150b57cec5SDimitry Andric
46160b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46170b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions
46180b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46190b57cec5SDimitry Andric
46200b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
46210b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
46220b57cec5SDimitry Andric                        SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> {
46230b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
46240b57cec5SDimitry Andric                 (ins VR128:$src),
46250b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46260b57cec5SDimitry Andric                 [(set VR128:$dst, (vt (OpNode VR128:$src)))]>,
46270b57cec5SDimitry Andric                 Sched<[sched.XMM]>;
46280b57cec5SDimitry Andric
46290b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
46300b57cec5SDimitry Andric                 (ins i128mem:$src),
46310b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46320b57cec5SDimitry Andric                 [(set VR128:$dst,
46330b57cec5SDimitry Andric                   (vt (OpNode (ld_frag addr:$src))))]>,
46340b57cec5SDimitry Andric                 Sched<[sched.XMM.Folded]>;
46350b57cec5SDimitry Andric}
46360b57cec5SDimitry Andric
46370b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
46380b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
46390b57cec5SDimitry Andric                          SDNode OpNode, X86SchedWriteWidths sched> {
46400b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
46410b57cec5SDimitry Andric                  (ins VR256:$src),
46420b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46430b57cec5SDimitry Andric                  [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
46440b57cec5SDimitry Andric                  Sched<[sched.YMM]>;
46450b57cec5SDimitry Andric
46460b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
46470b57cec5SDimitry Andric                  (ins i256mem:$src),
46480b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
46490b57cec5SDimitry Andric                  [(set VR256:$dst,
46500b57cec5SDimitry Andric                    (vt (OpNode (load addr:$src))))]>,
46510b57cec5SDimitry Andric                  Sched<[sched.YMM.Folded]>;
46520b57cec5SDimitry Andric}
46530b57cec5SDimitry Andric
46540b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
46550b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
46560b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
46570b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
46580b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
46590b57cec5SDimitry Andric}
46600b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
46610b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
46620b57cec5SDimitry Andric                              load>, VEX, VEX_WIG;
46630b57cec5SDimitry Andric}
46640b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
46650b57cec5SDimitry Andric  defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
46660b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
46670b57cec5SDimitry Andric  defm VPABSW  : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>,
46680b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
46690b57cec5SDimitry Andric}
46700b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
46710b57cec5SDimitry Andric  defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>,
46720b57cec5SDimitry Andric                                VEX, VEX_L, VEX_WIG;
46730b57cec5SDimitry Andric}
46740b57cec5SDimitry Andric
46750b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
46760b57cec5SDimitry Andric                          memop>;
46770b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
46780b57cec5SDimitry Andric                          memop>;
46790b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
46800b57cec5SDimitry Andric                          memop>;
46810b57cec5SDimitry Andric
46820b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46830b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions
46840b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
46850b57cec5SDimitry Andric
46860b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op
46870b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
46880b57cec5SDimitry Andric                         ValueType DstVT, ValueType OpVT, RegisterClass RC,
46890b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
46900b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
46910b57cec5SDimitry Andric  let isCommutable = 1 in
46920b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
46930b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
46940b57cec5SDimitry Andric       !if(Is2Addr,
46950b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
46960b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
46970b57cec5SDimitry Andric       [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>,
46980b57cec5SDimitry Andric       Sched<[sched]>;
46990b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
47000b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
47010b57cec5SDimitry Andric       !if(Is2Addr,
47020b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47030b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47040b57cec5SDimitry Andric       [(set RC:$dst,
47050b57cec5SDimitry Andric         (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
47060b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
47070b57cec5SDimitry Andric}
47080b57cec5SDimitry Andric
47090b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
47100b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
47110b57cec5SDimitry Andric                             Intrinsic IntId128, X86FoldableSchedWrite sched,
47120b57cec5SDimitry Andric                             PatFrag ld_frag, bit Is2Addr = 1> {
47130b57cec5SDimitry Andric  let isCommutable = 1 in
47140b57cec5SDimitry Andric  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
47150b57cec5SDimitry Andric       (ins VR128:$src1, VR128:$src2),
47160b57cec5SDimitry Andric       !if(Is2Addr,
47170b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47180b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47190b57cec5SDimitry Andric       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
47200b57cec5SDimitry Andric       Sched<[sched]>;
47210b57cec5SDimitry Andric  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
47220b57cec5SDimitry Andric       (ins VR128:$src1, i128mem:$src2),
47230b57cec5SDimitry Andric       !if(Is2Addr,
47240b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
47250b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
47260b57cec5SDimitry Andric       [(set VR128:$dst,
47270b57cec5SDimitry Andric         (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
47280b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
47290b57cec5SDimitry Andric}
47300b57cec5SDimitry Andric
47310b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
47320b57cec5SDimitry Andric                               Intrinsic IntId256,
47330b57cec5SDimitry Andric                               X86FoldableSchedWrite sched> {
47340b57cec5SDimitry Andric  let isCommutable = 1 in
47350b57cec5SDimitry Andric  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
47360b57cec5SDimitry Andric       (ins VR256:$src1, VR256:$src2),
47370b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
47380b57cec5SDimitry Andric       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
47390b57cec5SDimitry Andric       Sched<[sched]>;
47400b57cec5SDimitry Andric  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
47410b57cec5SDimitry Andric       (ins VR256:$src1, i256mem:$src2),
47420b57cec5SDimitry Andric       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
47430b57cec5SDimitry Andric       [(set VR256:$dst,
47440b57cec5SDimitry Andric         (IntId256 VR256:$src1, (load addr:$src2)))]>,
47450b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
47460b57cec5SDimitry Andric}
47470b57cec5SDimitry Andric
47480b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
47490b57cec5SDimitry Andriclet isCommutable = 0 in {
47500b57cec5SDimitry Andric  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
47510b57cec5SDimitry Andric                                  VR128, load, i128mem,
47520b57cec5SDimitry Andric                                  SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
47530b57cec5SDimitry Andric  defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
47540b57cec5SDimitry Andric                                  v16i8, VR128, load, i128mem,
47550b57cec5SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
47560b57cec5SDimitry Andric}
47570b57cec5SDimitry Andricdefm VPMULHRSW    : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
47580b57cec5SDimitry Andric                                  VR128, load, i128mem,
47590b57cec5SDimitry Andric                                  SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
47600b57cec5SDimitry Andric}
47610b57cec5SDimitry Andric
47620b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in {
47630b57cec5SDimitry Andriclet isCommutable = 0 in {
47640b57cec5SDimitry Andric  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
47650b57cec5SDimitry Andric                                  load, i128mem,
47660b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
47670b57cec5SDimitry Andric  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
47680b57cec5SDimitry Andric                                  load, i128mem,
47690b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
47700b57cec5SDimitry Andric  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
47710b57cec5SDimitry Andric                                  load, i128mem,
47720b57cec5SDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
47730b57cec5SDimitry Andric  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
47740b57cec5SDimitry Andric                                  load, i128mem,
4775d409305fSDimitry Andric                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
47760b57cec5SDimitry Andric  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
47770b57cec5SDimitry Andric                                      int_x86_ssse3_psign_b_128,
47780b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
47790b57cec5SDimitry Andric  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw",
47800b57cec5SDimitry Andric                                      int_x86_ssse3_psign_w_128,
47810b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
47820b57cec5SDimitry Andric  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
47830b57cec5SDimitry Andric                                      int_x86_ssse3_psign_d_128,
47840b57cec5SDimitry Andric                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
47850b57cec5SDimitry Andric  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
47860b57cec5SDimitry Andric                                      int_x86_ssse3_phadd_sw_128,
47870b57cec5SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
47880b57cec5SDimitry Andric  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
47890b57cec5SDimitry Andric                                      int_x86_ssse3_phsub_sw_128,
47900b57cec5SDimitry Andric                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
47910b57cec5SDimitry Andric}
47920b57cec5SDimitry Andric}
47930b57cec5SDimitry Andric
47940b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
47950b57cec5SDimitry Andriclet isCommutable = 0 in {
47960b57cec5SDimitry Andric  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
47970b57cec5SDimitry Andric                                  VR256, load, i256mem,
47980b57cec5SDimitry Andric                                  SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
47990b57cec5SDimitry Andric  defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
48000b57cec5SDimitry Andric                                   v32i8, VR256, load, i256mem,
48010b57cec5SDimitry Andric                                   SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48020b57cec5SDimitry Andric}
48030b57cec5SDimitry Andricdefm VPMULHRSWY   : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
48040b57cec5SDimitry Andric                                  VR256, load, i256mem,
48050b57cec5SDimitry Andric                                  SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48060b57cec5SDimitry Andric}
48070b57cec5SDimitry Andric
48080b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in {
48090b57cec5SDimitry Andriclet isCommutable = 0 in {
48100b57cec5SDimitry Andric  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
48110b57cec5SDimitry Andric                                  VR256, load, i256mem,
48120b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48130b57cec5SDimitry Andric  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
48140b57cec5SDimitry Andric                                  load, i256mem,
48150b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48160b57cec5SDimitry Andric  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
48170b57cec5SDimitry Andric                                  VR256, load, i256mem,
48180b57cec5SDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48190b57cec5SDimitry Andric  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
48200b57cec5SDimitry Andric                                  load, i256mem,
4821d409305fSDimitry Andric                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
48220b57cec5SDimitry Andric  defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
48230b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
48240b57cec5SDimitry Andric  defm VPSIGNW   : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
48250b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
48260b57cec5SDimitry Andric  defm VPSIGND   : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
48270b57cec5SDimitry Andric                                       SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
48280b57cec5SDimitry Andric  defm VPHADDSW  : SS3I_binop_rm_int_y<0x03, "vphaddsw",
48290b57cec5SDimitry Andric                                       int_x86_avx2_phadd_sw,
48300b57cec5SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
48310b57cec5SDimitry Andric  defm VPHSUBSW  : SS3I_binop_rm_int_y<0x07, "vphsubsw",
48320b57cec5SDimitry Andric                                       int_x86_avx2_phsub_sw,
48330b57cec5SDimitry Andric                                       SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
48340b57cec5SDimitry Andric}
48350b57cec5SDimitry Andric}
48360b57cec5SDimitry Andric
48370b57cec5SDimitry Andric// None of these have i8 immediate fields.
48380b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in {
48390b57cec5SDimitry Andriclet isCommutable = 0 in {
48400b57cec5SDimitry Andric  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
48410b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
48420b57cec5SDimitry Andric  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
48430b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
48440b57cec5SDimitry Andric  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
48450b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
48460b57cec5SDimitry Andric  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
48470b57cec5SDimitry Andric                                 memop, i128mem, SchedWritePHAdd.XMM>;
48480b57cec5SDimitry Andric  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
48490b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
48500b57cec5SDimitry Andric  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
48510b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
48520b57cec5SDimitry Andric  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
48530b57cec5SDimitry Andric                                     SchedWriteVecALU.XMM, memop>;
48540b57cec5SDimitry Andric  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
48550b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVarShuffle.XMM>;
48560b57cec5SDimitry Andric  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
48570b57cec5SDimitry Andric                                     int_x86_ssse3_phadd_sw_128,
48580b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
48590b57cec5SDimitry Andric  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
48600b57cec5SDimitry Andric                                     int_x86_ssse3_phsub_sw_128,
48610b57cec5SDimitry Andric                                     SchedWritePHAdd.XMM, memop>;
48620b57cec5SDimitry Andric  defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
48630b57cec5SDimitry Andric                                 v16i8, VR128, memop, i128mem,
48640b57cec5SDimitry Andric                                 SchedWriteVecIMul.XMM>;
48650b57cec5SDimitry Andric}
48660b57cec5SDimitry Andricdefm PMULHRSW    : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
48670b57cec5SDimitry Andric                                 VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
48680b57cec5SDimitry Andric}
48690b57cec5SDimitry Andric
48700b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
48710b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns
48720b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
48730b57cec5SDimitry Andric
48740b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
48750b57cec5SDimitry Andric                         PatFrag memop_frag, X86MemOperand x86memop,
48760b57cec5SDimitry Andric                         X86FoldableSchedWrite sched, bit Is2Addr = 1> {
48770b57cec5SDimitry Andric  let hasSideEffects = 0 in {
48780b57cec5SDimitry Andric  def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst),
48790b57cec5SDimitry Andric      (ins RC:$src1, RC:$src2, u8imm:$src3),
48800b57cec5SDimitry Andric      !if(Is2Addr,
48810b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
48820b57cec5SDimitry Andric        !strconcat(asm,
48830b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
48848bcb0991SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
48850b57cec5SDimitry Andric      Sched<[sched]>;
48860b57cec5SDimitry Andric  let mayLoad = 1 in
48870b57cec5SDimitry Andric  def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
48880b57cec5SDimitry Andric      (ins RC:$src1, x86memop:$src2, u8imm:$src3),
48890b57cec5SDimitry Andric      !if(Is2Addr,
48900b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
48910b57cec5SDimitry Andric        !strconcat(asm,
48920b57cec5SDimitry Andric                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
48930b57cec5SDimitry Andric      [(set RC:$dst, (VT (X86PAlignr RC:$src1,
48940b57cec5SDimitry Andric                                     (memop_frag addr:$src2),
48958bcb0991SDimitry Andric                                     (i8 timm:$src3))))]>,
48960b57cec5SDimitry Andric      Sched<[sched.Folded, sched.ReadAfterFold]>;
48970b57cec5SDimitry Andric  }
48980b57cec5SDimitry Andric}
48990b57cec5SDimitry Andric
49000b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in
49010b57cec5SDimitry Andric  defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
49020b57cec5SDimitry Andric                                SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
49030b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
49040b57cec5SDimitry Andric  defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
49050b57cec5SDimitry Andric                                 SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
49060b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
49070b57cec5SDimitry Andric  defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
49080b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
49090b57cec5SDimitry Andric
49100b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49110b57cec5SDimitry Andric// SSSE3 - Thread synchronization
49120b57cec5SDimitry Andric//===---------------------------------------------------------------------===//
49130b57cec5SDimitry Andric
49140b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
49150b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in
49160b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
49170b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, Not64BitMode]>;
49180b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in
49190b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
49200b57cec5SDimitry Andric                     TB, Requires<[HasSSE3, In64BitMode]>;
49210b57cec5SDimitry Andric
49220b57cec5SDimitry Andriclet Uses = [ECX, EAX] in
49230b57cec5SDimitry Andricdef MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
49240b57cec5SDimitry Andric                  [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
49250b57cec5SDimitry Andric} // SchedRW
49260b57cec5SDimitry Andric
49270b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
49280b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
49290b57cec5SDimitry Andric
49300b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
49310b57cec5SDimitry Andric      Requires<[Not64BitMode]>;
49320b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
49330b57cec5SDimitry Andric      Requires<[In64BitMode]>;
49340b57cec5SDimitry Andric
49350b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
49360b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend
49370b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
49380b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
49390b57cec5SDimitry Andric
49400b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
49410b57cec5SDimitry Andric                            RegisterClass OutRC, RegisterClass InRC,
49420b57cec5SDimitry Andric                            X86FoldableSchedWrite sched> {
49430b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
49440b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
49450b57cec5SDimitry Andric                 Sched<[sched]>;
49460b57cec5SDimitry Andric
49470b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
49480b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
49490b57cec5SDimitry Andric                 Sched<[sched.Folded]>;
49500b57cec5SDimitry Andric}
49510b57cec5SDimitry Andric
49520b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
49530b57cec5SDimitry Andric                              X86MemOperand MemOp, X86MemOperand MemYOp,
49540b57cec5SDimitry Andric                              Predicate prd> {
49550b57cec5SDimitry Andric  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
49560b57cec5SDimitry Andric                               SchedWriteShuffle.XMM>;
49570b57cec5SDimitry Andric  let Predicates = [HasAVX, prd] in
49580b57cec5SDimitry Andric    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
49590b57cec5SDimitry Andric                                     VR128, VR128, SchedWriteShuffle.XMM>,
49600b57cec5SDimitry Andric                                     VEX, VEX_WIG;
49610b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in
49620b57cec5SDimitry Andric    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
4963*fe6060f1SDimitry Andric                                     VR256, VR128, WriteVPMOV256>,
49640b57cec5SDimitry Andric                                     VEX, VEX_L, VEX_WIG;
49650b57cec5SDimitry Andric}
49660b57cec5SDimitry Andric
49670b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
49680b57cec5SDimitry Andric                          X86MemOperand MemYOp, Predicate prd> {
49690b57cec5SDimitry Andric  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),
49700b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
49710b57cec5SDimitry Andric  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
49720b57cec5SDimitry Andric                                        !strconcat("pmovzx", OpcodeStr),
49730b57cec5SDimitry Andric                                        MemOp, MemYOp, prd>;
49740b57cec5SDimitry Andric}
49750b57cec5SDimitry Andric
49760b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>;
49770b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>;
49780b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>;
49790b57cec5SDimitry Andric
49800b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>;
49810b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
49820b57cec5SDimitry Andric
49830b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
49840b57cec5SDimitry Andric
49850b57cec5SDimitry Andric// AVX2 Patterns
49860b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
49870b57cec5SDimitry Andric                                     SDNode ExtOp, SDNode InVecOp> {
49880b57cec5SDimitry Andric  // Register-Register patterns
49890b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
49900b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
49910b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
49920b57cec5SDimitry Andric  }
49930b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
49940b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
49950b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
49960b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
49970b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
49980b57cec5SDimitry Andric
49990b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
50000b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
50010b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
50020b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
50030b57cec5SDimitry Andric
50040b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
50050b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
50060b57cec5SDimitry Andric  }
50070b57cec5SDimitry Andric
50080b57cec5SDimitry Andric  // Simple Register-Memory patterns
50090b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
50100b57cec5SDimitry Andric  def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50110b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
50120b57cec5SDimitry Andric
50130b57cec5SDimitry Andric  def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
50140b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
50150b57cec5SDimitry Andric  }
50160b57cec5SDimitry Andric
50170b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
50180b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50190b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50200b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50210b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
50220b57cec5SDimitry Andric
50230b57cec5SDimitry Andric  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50240b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
50250b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50260b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
50270b57cec5SDimitry Andric
50280b57cec5SDimitry Andric  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
50290b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
50300b57cec5SDimitry Andric  }
50310b57cec5SDimitry Andric
50320b57cec5SDimitry Andric  // AVX2 Register-Memory patterns
50330b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
50340b57cec5SDimitry Andric  def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
50350b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
50360b57cec5SDimitry Andric
50370b57cec5SDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
50380b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50395ffd83dbSDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
50405ffd83dbSDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50415ffd83dbSDimitry Andric  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
50420b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
50430b57cec5SDimitry Andric
50440b57cec5SDimitry Andric  def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
50450b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
50460b57cec5SDimitry Andric
50470b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
50480b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
50495ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))),
50500b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
50510b57cec5SDimitry Andric
50520b57cec5SDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
50530b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
50545ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
50555ffd83dbSDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
50565ffd83dbSDimitry Andric  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
50570b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
50580b57cec5SDimitry Andric  }
50590b57cec5SDimitry Andric}
50600b57cec5SDimitry Andric
50610b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
50620b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
50630b57cec5SDimitry Andric
50640b57cec5SDimitry Andric// SSE4.1/AVX patterns.
50650b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
50660b57cec5SDimitry Andric                                SDNode ExtOp> {
50670b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
50680b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
50690b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
50700b57cec5SDimitry Andric  }
50710b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
50720b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
50730b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
50740b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
50750b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
50760b57cec5SDimitry Andric
50770b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
50780b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
50790b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
50800b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
50810b57cec5SDimitry Andric
50820b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
50830b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
50840b57cec5SDimitry Andric  }
50850b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
50860b57cec5SDimitry Andric  def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50870b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
50880b57cec5SDimitry Andric  }
50890b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
50900b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50910b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
50920b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
50930b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
50940b57cec5SDimitry Andric
50950b57cec5SDimitry Andric  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50960b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
50970b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
50980b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
50990b57cec5SDimitry Andric
51000b57cec5SDimitry Andric  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
51010b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51020b57cec5SDimitry Andric  }
51030b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
51040b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51050b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51060b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51070b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51080b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
51090b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51100b57cec5SDimitry Andric  def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
51110b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
51120b57cec5SDimitry Andric  }
51130b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
51140b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
51150b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51160b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
51170b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51180b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
51190b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
51200b57cec5SDimitry Andric
51210b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
51220b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
51230b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
51240b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
51250b57cec5SDimitry Andric
51260b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51270b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51280b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51290b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51300b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
51310b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51320b57cec5SDimitry Andric  def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
51330b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
51340b57cec5SDimitry Andric
51350b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
51360b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51370b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
51380b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51390b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
51400b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
51410b57cec5SDimitry Andric
51420b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
51430b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51440b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
51450b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51460b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
51470b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51480b57cec5SDimitry Andric  def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
51490b57cec5SDimitry Andric            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
51500b57cec5SDimitry Andric  }
51510b57cec5SDimitry Andric}
51520b57cec5SDimitry Andric
51530b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>;
51540b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>;
51550b57cec5SDimitry Andric
51560b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
51570b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>;
51580b57cec5SDimitry Andric  defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>;
51590b57cec5SDimitry Andric}
51600b57cec5SDimitry Andric
51610b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
51620b57cec5SDimitry Andric// SSE4.1 - Extract Instructions
51630b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
51640b57cec5SDimitry Andric
51650b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
51660b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> {
51670b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
51680b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
51690b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
51700b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
51710b57cec5SDimitry Andric                 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
5172e8d8bef9SDimitry Andric                                         timm:$src2))]>,
51730b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
51740b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
51750b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
51760b57cec5SDimitry Andric                 (ins i8mem:$dst, VR128:$src1, u8imm:$src2),
51770b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
51780b57cec5SDimitry Andric                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5179e8d8bef9SDimitry Andric                 [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
51800b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
51810b57cec5SDimitry Andric}
51820b57cec5SDimitry Andric
51830b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
51840b57cec5SDimitry Andric  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
51850b57cec5SDimitry Andric
51860b57cec5SDimitry Andricdefm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
51870b57cec5SDimitry Andric
51880b57cec5SDimitry Andric
51890b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
51900b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> {
51910b57cec5SDimitry Andric  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
51920b57cec5SDimitry Andric  def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
51930b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
51940b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
51950b57cec5SDimitry Andric                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
51960b57cec5SDimitry Andric                   Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>;
51970b57cec5SDimitry Andric
51980b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
51990b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52000b57cec5SDimitry Andric                 (ins i16mem:$dst, VR128:$src1, u8imm:$src2),
52010b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52020b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5203e8d8bef9SDimitry Andric                 [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
52040b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52050b57cec5SDimitry Andric}
52060b57cec5SDimitry Andric
52070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
52080b57cec5SDimitry Andric  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
52090b57cec5SDimitry Andric
52100b57cec5SDimitry Andricdefm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
52110b57cec5SDimitry Andric
52120b57cec5SDimitry Andric
52130b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
52140b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> {
52150b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
52160b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
52170b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52180b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52190b57cec5SDimitry Andric                 [(set GR32:$dst,
52200b57cec5SDimitry Andric                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>,
52210b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
52220b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52230b57cec5SDimitry Andric                 (ins i32mem:$dst, VR128:$src1, u8imm:$src2),
52240b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52250b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52260b57cec5SDimitry Andric                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
52270b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52280b57cec5SDimitry Andric}
52290b57cec5SDimitry Andric
52300b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
52310b57cec5SDimitry Andric  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
52320b57cec5SDimitry Andric
52330b57cec5SDimitry Andricdefm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
52340b57cec5SDimitry Andric
52350b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
52360b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> {
52370b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
52380b57cec5SDimitry Andric                 (ins VR128:$src1, u8imm:$src2),
52390b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52400b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52410b57cec5SDimitry Andric                 [(set GR64:$dst,
52420b57cec5SDimitry Andric                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>,
52430b57cec5SDimitry Andric                  Sched<[WriteVecExtract]>;
52440b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52450b57cec5SDimitry Andric                 (ins i64mem:$dst, VR128:$src1, u8imm:$src2),
52460b57cec5SDimitry Andric                 !strconcat(OpcodeStr,
52470b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52480b57cec5SDimitry Andric                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
52490b57cec5SDimitry Andric                          addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52500b57cec5SDimitry Andric}
52510b57cec5SDimitry Andric
52520b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
52530b57cec5SDimitry Andric  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
52540b57cec5SDimitry Andric
52550b57cec5SDimitry Andricdefm PEXTRQ      : SS41I_extract64<0x16, "pextrq">, REX_W;
52560b57cec5SDimitry Andric
52570b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
52580b57cec5SDimitry Andric/// destination
52590b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
52600b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
52610b57cec5SDimitry Andric                   (ins VR128:$src1, u8imm:$src2),
52620b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
52630b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52640b57cec5SDimitry Andric                   [(set GR32orGR64:$dst,
52650b57cec5SDimitry Andric                      (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
52660b57cec5SDimitry Andric                   Sched<[WriteVecExtract]>;
52670b57cec5SDimitry Andric  def mr : SS4AIi8<opc, MRMDestMem, (outs),
52680b57cec5SDimitry Andric                   (ins f32mem:$dst, VR128:$src1, u8imm:$src2),
52690b57cec5SDimitry Andric                   !strconcat(OpcodeStr,
52700b57cec5SDimitry Andric                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
52710b57cec5SDimitry Andric                   [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
52720b57cec5SDimitry Andric                            addr:$dst)]>, Sched<[WriteVecExtractSt]>;
52730b57cec5SDimitry Andric}
52740b57cec5SDimitry Andric
52750b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
52760b57cec5SDimitry Andric  let Predicates = [UseAVX] in
52770b57cec5SDimitry Andric    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
52780b57cec5SDimitry Andric  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
52790b57cec5SDimitry Andric}
52800b57cec5SDimitry Andric
52810b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
52820b57cec5SDimitry Andric// SSE4.1 - Insert Instructions
52830b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
52840b57cec5SDimitry Andric
52850b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
52860b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
52870b57cec5SDimitry Andric      (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3),
52880b57cec5SDimitry Andric      !if(Is2Addr,
52890b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
52900b57cec5SDimitry Andric        !strconcat(asm,
52910b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
52920b57cec5SDimitry Andric      [(set VR128:$dst,
5293e8d8bef9SDimitry Andric        (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
52940b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
52950b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
52960b57cec5SDimitry Andric      (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
52970b57cec5SDimitry Andric      !if(Is2Addr,
52980b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
52990b57cec5SDimitry Andric        !strconcat(asm,
53000b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53010b57cec5SDimitry Andric      [(set VR128:$dst,
5302e8d8bef9SDimitry Andric        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
53030b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
53040b57cec5SDimitry Andric}
53050b57cec5SDimitry Andric
53060b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in
53070b57cec5SDimitry Andric  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
53080b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
53090b57cec5SDimitry Andric  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
53100b57cec5SDimitry Andric
53110b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
53120b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
53130b57cec5SDimitry Andric      (ins VR128:$src1, GR32:$src2, u8imm:$src3),
53140b57cec5SDimitry Andric      !if(Is2Addr,
53150b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53160b57cec5SDimitry Andric        !strconcat(asm,
53170b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53180b57cec5SDimitry Andric      [(set VR128:$dst,
53190b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
53200b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
53210b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
53220b57cec5SDimitry Andric      (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
53230b57cec5SDimitry Andric      !if(Is2Addr,
53240b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53250b57cec5SDimitry Andric        !strconcat(asm,
53260b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53270b57cec5SDimitry Andric      [(set VR128:$dst,
53280b57cec5SDimitry Andric        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
53290b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
53300b57cec5SDimitry Andric}
53310b57cec5SDimitry Andric
53320b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
53330b57cec5SDimitry Andric  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
53340b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
53350b57cec5SDimitry Andric  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
53360b57cec5SDimitry Andric
53370b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
53380b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
53390b57cec5SDimitry Andric      (ins VR128:$src1, GR64:$src2, u8imm:$src3),
53400b57cec5SDimitry Andric      !if(Is2Addr,
53410b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53420b57cec5SDimitry Andric        !strconcat(asm,
53430b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53440b57cec5SDimitry Andric      [(set VR128:$dst,
53450b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
53460b57cec5SDimitry Andric      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
53470b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
53480b57cec5SDimitry Andric      (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
53490b57cec5SDimitry Andric      !if(Is2Addr,
53500b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53510b57cec5SDimitry Andric        !strconcat(asm,
53520b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53530b57cec5SDimitry Andric      [(set VR128:$dst,
53540b57cec5SDimitry Andric        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
53550b57cec5SDimitry Andric                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
53560b57cec5SDimitry Andric}
53570b57cec5SDimitry Andric
53580b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in
53590b57cec5SDimitry Andric  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
53600b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
53610b57cec5SDimitry Andric  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
53620b57cec5SDimitry Andric
53630b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which
53640b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination
53650b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements
53660b57cec5SDimitry Andric// in the target vector.
53670b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
53680b57cec5SDimitry Andric  let isCommutable = 1 in
53690b57cec5SDimitry Andric  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
53700b57cec5SDimitry Andric      (ins VR128:$src1, VR128:$src2, u8imm:$src3),
53710b57cec5SDimitry Andric      !if(Is2Addr,
53720b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53730b57cec5SDimitry Andric        !strconcat(asm,
53740b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53750b57cec5SDimitry Andric      [(set VR128:$dst,
53768bcb0991SDimitry Andric        (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
53770b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM]>;
53780b57cec5SDimitry Andric  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
53790b57cec5SDimitry Andric      (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
53800b57cec5SDimitry Andric      !if(Is2Addr,
53810b57cec5SDimitry Andric        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
53820b57cec5SDimitry Andric        !strconcat(asm,
53830b57cec5SDimitry Andric                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
53840b57cec5SDimitry Andric      [(set VR128:$dst,
53850b57cec5SDimitry Andric        (X86insertps VR128:$src1,
53860b57cec5SDimitry Andric                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
53878bcb0991SDimitry Andric                    timm:$src3))]>,
53880b57cec5SDimitry Andric      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
53890b57cec5SDimitry Andric}
53900b57cec5SDimitry Andric
53910b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
53920b57cec5SDimitry Andric  let Predicates = [UseAVX] in
53930b57cec5SDimitry Andric    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
53940b57cec5SDimitry Andric                     VEX_4V, VEX_WIG;
53950b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in
53960b57cec5SDimitry Andric    defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
53970b57cec5SDimitry Andric}
53980b57cec5SDimitry Andric
53990b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
54000b57cec5SDimitry Andric// SSE4.1 - Round Instructions
54010b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
54020b57cec5SDimitry Andric
54030b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
54040b57cec5SDimitry Andric                           X86MemOperand x86memop, RegisterClass RC,
5405*fe6060f1SDimitry Andric                           ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode,
54060b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
54070b57cec5SDimitry Andric  // Intrinsic operation, reg.
54080b57cec5SDimitry Andric  // Vector intrinsic operation, reg
5409480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
54100b57cec5SDimitry Andric  def r : SS4AIi8<opc, MRMSrcReg,
54110b57cec5SDimitry Andric                  (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
54120b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
54130b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54148bcb0991SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
54150b57cec5SDimitry Andric                  Sched<[sched]>;
54160b57cec5SDimitry Andric
54170b57cec5SDimitry Andric  // Vector intrinsic operation, mem
54180b57cec5SDimitry Andric  def m : SS4AIi8<opc, MRMSrcMem,
54190b57cec5SDimitry Andric                  (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
54200b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
54210b57cec5SDimitry Andric                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54220b57cec5SDimitry Andric                  [(set RC:$dst,
54238bcb0991SDimitry Andric                        (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
54240b57cec5SDimitry Andric                  Sched<[sched.Folded]>;
54250b57cec5SDimitry Andric}
5426480093f4SDimitry Andric}
54270b57cec5SDimitry Andric
54280b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
54290b57cec5SDimitry Andric                          string OpcodeStr, X86FoldableSchedWrite sched> {
54308bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
54310b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
54320b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
54330b57cec5SDimitry Andric        !strconcat(OpcodeStr,
54340b57cec5SDimitry Andric            "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
54350b57cec5SDimitry Andric      []>, Sched<[sched]>;
54360b57cec5SDimitry Andric
54370b57cec5SDimitry Andric  let mayLoad = 1 in
54380b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
54390b57cec5SDimitry Andric        (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
54400b57cec5SDimitry Andric        !strconcat(OpcodeStr,
54410b57cec5SDimitry Andric             "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
54420b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
54430b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
54440b57cec5SDimitry Andric
54458bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
54460b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
54470b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
54480b57cec5SDimitry Andric        !strconcat(OpcodeStr,
54490b57cec5SDimitry Andric              "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
54500b57cec5SDimitry Andric        []>, Sched<[sched]>;
54510b57cec5SDimitry Andric
54520b57cec5SDimitry Andric  let mayLoad = 1 in
54530b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
54540b57cec5SDimitry Andric        (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
54550b57cec5SDimitry Andric        !strconcat(OpcodeStr,
54560b57cec5SDimitry Andric             "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
54570b57cec5SDimitry Andric        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
54580b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
54590b57cec5SDimitry Andric}
54600b57cec5SDimitry Andric
54610b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
54620b57cec5SDimitry Andric                           string OpcodeStr, X86FoldableSchedWrite sched> {
5463480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
54648bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in {
54650b57cec5SDimitry Andric  def SSr : SS4AIi8<opcss, MRMSrcReg,
54660b57cec5SDimitry Andric                    (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
54670b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
54680b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54690b57cec5SDimitry Andric                    []>, Sched<[sched]>;
54700b57cec5SDimitry Andric
54710b57cec5SDimitry Andric  let mayLoad = 1 in
54720b57cec5SDimitry Andric  def SSm : SS4AIi8<opcss, MRMSrcMem,
54730b57cec5SDimitry Andric                    (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
54740b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
54750b57cec5SDimitry Andric                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54760b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
54770b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0
54780b57cec5SDimitry Andric
54798bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in {
54800b57cec5SDimitry Andric  def SDr : SS4AIi8<opcsd, MRMSrcReg,
54810b57cec5SDimitry Andric                    (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
54820b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
54830b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54840b57cec5SDimitry Andric                    []>, Sched<[sched]>;
54850b57cec5SDimitry Andric
54860b57cec5SDimitry Andric  let mayLoad = 1 in
54870b57cec5SDimitry Andric  def SDm : SS4AIi8<opcsd, MRMSrcMem,
54880b57cec5SDimitry Andric                    (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
54890b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
54900b57cec5SDimitry Andric                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
54910b57cec5SDimitry Andric                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
54920b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0
54930b57cec5SDimitry Andric}
5494480093f4SDimitry Andric}
54950b57cec5SDimitry Andric
54960b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
54970b57cec5SDimitry Andric                            string OpcodeStr, X86FoldableSchedWrite sched,
54980b57cec5SDimitry Andric                            ValueType VT32, ValueType VT64,
54990b57cec5SDimitry Andric                            SDNode OpNode, bit Is2Addr = 1> {
5500480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
55018bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle in {
55020b57cec5SDimitry Andric  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
55030b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
55040b57cec5SDimitry Andric        !if(Is2Addr,
55050b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55060b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55070b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55080b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55098bcb0991SDimitry Andric        [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
55100b57cec5SDimitry Andric        Sched<[sched]>;
55110b57cec5SDimitry Andric
55120b57cec5SDimitry Andric  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
55130b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
55140b57cec5SDimitry Andric        !if(Is2Addr,
55150b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55160b57cec5SDimitry Andric                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55170b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55180b57cec5SDimitry Andric                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55190b57cec5SDimitry Andric        [(set VR128:$dst,
55205ffd83dbSDimitry Andric             (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>,
55210b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
55220b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
55230b57cec5SDimitry Andric
55248bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble in {
55250b57cec5SDimitry Andric  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
55260b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
55270b57cec5SDimitry Andric        !if(Is2Addr,
55280b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55290b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55300b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55310b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55328bcb0991SDimitry Andric        [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
55330b57cec5SDimitry Andric        Sched<[sched]>;
55340b57cec5SDimitry Andric
55350b57cec5SDimitry Andric  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
55360b57cec5SDimitry Andric        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
55370b57cec5SDimitry Andric        !if(Is2Addr,
55380b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55390b57cec5SDimitry Andric                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
55400b57cec5SDimitry Andric            !strconcat(OpcodeStr,
55410b57cec5SDimitry Andric                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
55420b57cec5SDimitry Andric        [(set VR128:$dst,
55435ffd83dbSDimitry Andric              (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>,
55440b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
55450b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
55460b57cec5SDimitry Andric}
5547480093f4SDimitry Andric}
55480b57cec5SDimitry Andric
55490b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd
55500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
5551480093f4SDimitry Andric  let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
55520b57cec5SDimitry Andric    // Intrinsic form
55530b57cec5SDimitry Andric    defm VROUNDPS  : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
5554480093f4SDimitry Andric                                     loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
55550b57cec5SDimitry Andric                                   VEX, VEX_WIG;
55560b57cec5SDimitry Andric    defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
5557480093f4SDimitry Andric                                     loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
55580b57cec5SDimitry Andric                                   VEX, VEX_L, VEX_WIG;
55590b57cec5SDimitry Andric  }
55600b57cec5SDimitry Andric
5561480093f4SDimitry Andric  let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
55620b57cec5SDimitry Andric    defm VROUNDPD  : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
5563480093f4SDimitry Andric                                     loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
55640b57cec5SDimitry Andric                                   VEX, VEX_WIG;
55650b57cec5SDimitry Andric    defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
5566480093f4SDimitry Andric                                     loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
55670b57cec5SDimitry Andric                                   VEX, VEX_L, VEX_WIG;
55680b57cec5SDimitry Andric  }
55690b57cec5SDimitry Andric}
55700b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
55710b57cec5SDimitry Andric  defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
55720b57cec5SDimitry Andric                                  v4f32, v2f64, X86RndScales, 0>,
5573480093f4SDimitry Andric                                  VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
55740b57cec5SDimitry Andric  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
5575480093f4SDimitry Andric                                VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
55760b57cec5SDimitry Andric}
55770b57cec5SDimitry Andric
55780b57cec5SDimitry Andriclet Predicates = [UseAVX] in {
5579480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
55808bcb0991SDimitry Andric            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
5581480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
55828bcb0991SDimitry Andric            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
55830b57cec5SDimitry Andric}
55840b57cec5SDimitry Andric
55850b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in {
5586480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
55878bcb0991SDimitry Andric            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
5588480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
55898bcb0991SDimitry Andric            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
55900b57cec5SDimitry Andric}
55910b57cec5SDimitry Andric
55920b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
55930b57cec5SDimitry Andricdefm ROUNDPS  : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
5594480093f4SDimitry Andric                                memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
55950b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
55960b57cec5SDimitry Andricdefm ROUNDPD  : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
5597480093f4SDimitry Andric                                memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
55980b57cec5SDimitry Andric
55990b57cec5SDimitry Andricdefm ROUND  : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
56000b57cec5SDimitry Andric
56010b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
56020b57cec5SDimitry Andricdefm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
56030b57cec5SDimitry Andric                               v4f32, v2f64, X86RndScales>;
56040b57cec5SDimitry Andric
56050b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
5606480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
56078bcb0991SDimitry Andric            (ROUNDSSr FR32:$src1, timm:$src2)>;
5608480093f4SDimitry Andric  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
56098bcb0991SDimitry Andric            (ROUNDSDr FR64:$src1, timm:$src2)>;
56100b57cec5SDimitry Andric}
56110b57cec5SDimitry Andric
56120b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in {
5613480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
56148bcb0991SDimitry Andric            (ROUNDSSm addr:$src1, timm:$src2)>;
5615480093f4SDimitry Andric  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
56168bcb0991SDimitry Andric            (ROUNDSDm addr:$src1, timm:$src2)>;
56170b57cec5SDimitry Andric}
56180b57cec5SDimitry Andric
56190b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56200b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test
56210b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56220b57cec5SDimitry Andric
56230b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from
56240b57cec5SDimitry Andric// the intel intrinsic that corresponds to this.
56250b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
56260b57cec5SDimitry Andricdef VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
56270b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
56280b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
56290b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG;
56300b57cec5SDimitry Andricdef VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
56310b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
56320b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
56330b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
56340b57cec5SDimitry Andric                VEX, VEX_WIG;
56350b57cec5SDimitry Andric
56360b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
56370b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
56380b57cec5SDimitry Andric                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
56390b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG;
56400b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
56410b57cec5SDimitry Andric                "vptest\t{$src2, $src1|$src1, $src2}",
56420b57cec5SDimitry Andric                [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
56430b57cec5SDimitry Andric                Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
56440b57cec5SDimitry Andric                VEX, VEX_L, VEX_WIG;
56450b57cec5SDimitry Andric}
56460b57cec5SDimitry Andric
56470b57cec5SDimitry Andriclet Defs = [EFLAGS] in {
56480b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
56490b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
56500b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
56510b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM]>;
56520b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
56530b57cec5SDimitry Andric              "ptest\t{$src2, $src1|$src1, $src2}",
56540b57cec5SDimitry Andric              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
56550b57cec5SDimitry Andric              Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
56560b57cec5SDimitry Andric}
56570b57cec5SDimitry Andric
56580b57cec5SDimitry Andric// The bit test instructions below are AVX only
56590b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
56600b57cec5SDimitry Andric                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
56610b57cec5SDimitry Andric                       X86FoldableSchedWrite sched> {
56620b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
56630b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
56640b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>,
56650b57cec5SDimitry Andric            Sched<[sched]>, VEX;
56660b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
56670b57cec5SDimitry Andric            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
56680b57cec5SDimitry Andric            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
56690b57cec5SDimitry Andric            Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
56700b57cec5SDimitry Andric}
56710b57cec5SDimitry Andric
56720b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in {
56730b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
56740b57cec5SDimitry Andricdefm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
56750b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
56760b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32,
56770b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
56780b57cec5SDimitry Andric}
56790b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
56800b57cec5SDimitry Andricdefm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64,
56810b57cec5SDimitry Andric                            SchedWriteFTest.XMM>;
56820b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
56830b57cec5SDimitry Andric                            SchedWriteFTest.YMM>, VEX_L;
56840b57cec5SDimitry Andric}
56850b57cec5SDimitry Andric}
56860b57cec5SDimitry Andric
56870b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56880b57cec5SDimitry Andric// SSE4.1 - Misc Instructions
56890b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
56900b57cec5SDimitry Andric
56910b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
56920b57cec5SDimitry Andric  def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
56930b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
56940b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
56950b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize16, XS;
56960b57cec5SDimitry Andric  def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
56970b57cec5SDimitry Andric                     "popcnt{w}\t{$src, $dst|$dst, $src}",
56980b57cec5SDimitry Andric                     [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
56990b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
57000b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize16, XS;
57010b57cec5SDimitry Andric
57020b57cec5SDimitry Andric  def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
57030b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
57040b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
57050b57cec5SDimitry Andric                     Sched<[WritePOPCNT]>, OpSize32, XS;
57060b57cec5SDimitry Andric
57070b57cec5SDimitry Andric  def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
57080b57cec5SDimitry Andric                     "popcnt{l}\t{$src, $dst|$dst, $src}",
57090b57cec5SDimitry Andric                     [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
57100b57cec5SDimitry Andric                      (implicit EFLAGS)]>,
57110b57cec5SDimitry Andric                      Sched<[WritePOPCNT.Folded]>, OpSize32, XS;
57120b57cec5SDimitry Andric
57130b57cec5SDimitry Andric  def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
57140b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
57150b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
57160b57cec5SDimitry Andric                      Sched<[WritePOPCNT]>, XS;
57170b57cec5SDimitry Andric  def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
57180b57cec5SDimitry Andric                      "popcnt{q}\t{$src, $dst|$dst, $src}",
57190b57cec5SDimitry Andric                      [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
57200b57cec5SDimitry Andric                       (implicit EFLAGS)]>,
57210b57cec5SDimitry Andric                       Sched<[WritePOPCNT.Folded]>, XS;
57220b57cec5SDimitry Andric}
57230b57cec5SDimitry Andric
57240b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
57250b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
57260b57cec5SDimitry Andric                                 SDNode OpNode, PatFrag ld_frag,
57270b57cec5SDimitry Andric                                 X86FoldableSchedWrite Sched> {
57280b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
57290b57cec5SDimitry Andric                 (ins VR128:$src),
57300b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
57310b57cec5SDimitry Andric                 [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>,
57320b57cec5SDimitry Andric                 Sched<[Sched]>;
57330b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
57340b57cec5SDimitry Andric                  (ins i128mem:$src),
57350b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
57360b57cec5SDimitry Andric                  [(set VR128:$dst,
57370b57cec5SDimitry Andric                    (v8i16 (OpNode (ld_frag addr:$src))))]>,
57380b57cec5SDimitry Andric                 Sched<[Sched.Folded]>;
57390b57cec5SDimitry Andric}
57400b57cec5SDimitry Andric
57410b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling
57420b57cec5SDimitry Andric// model, although the naming is misleading.
57430b57cec5SDimitry Andriclet Predicates = [HasAVX] in
57440b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
57450b57cec5SDimitry Andric                                         X86phminpos, load,
57460b57cec5SDimitry Andric                                         WritePHMINPOS>, VEX, VEX_WIG;
57470b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
57480b57cec5SDimitry Andric                                         X86phminpos, memop,
57490b57cec5SDimitry Andric                                         WritePHMINPOS>;
57500b57cec5SDimitry Andric
57510b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator.
57520b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
57530b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
57540b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
57550b57cec5SDimitry Andric                          bit Is2Addr = 1> {
57560b57cec5SDimitry Andric  let isCommutable = 1 in
57570b57cec5SDimitry Andric  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
57580b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
57590b57cec5SDimitry Andric       !if(Is2Addr,
57600b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
57610b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
57620b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
57630b57cec5SDimitry Andric       Sched<[sched]>;
57640b57cec5SDimitry Andric  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
57650b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
57660b57cec5SDimitry Andric       !if(Is2Addr,
57670b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
57680b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
57690b57cec5SDimitry Andric       [(set RC:$dst,
57700b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
57710b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
57720b57cec5SDimitry Andric}
57730b57cec5SDimitry Andric
57740b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
57750b57cec5SDimitry Andric  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
57760b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57770b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57780b57cec5SDimitry Andric  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
57790b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57800b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57810b57cec5SDimitry Andric  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
57820b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57830b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57840b57cec5SDimitry Andric  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
57850b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57860b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57870b57cec5SDimitry Andric  defm VPMULDQ   : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
57880b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecIMul.XMM, 0>,
57890b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57900b57cec5SDimitry Andric}
57910b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
57920b57cec5SDimitry Andric  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
57930b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57940b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57950b57cec5SDimitry Andric  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
57960b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
57970b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
57980b57cec5SDimitry Andric  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
57990b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
58000b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
58010b57cec5SDimitry Andric  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
58020b57cec5SDimitry Andric                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
58030b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
58040b57cec5SDimitry Andric}
58050b57cec5SDimitry Andric
58060b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
58070b57cec5SDimitry Andric  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
58080b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58090b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58100b57cec5SDimitry Andric  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
58110b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58120b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58130b57cec5SDimitry Andric  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
58140b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58150b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58160b57cec5SDimitry Andric  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
58170b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58180b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58190b57cec5SDimitry Andric  defm VPMULDQY  : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
58200b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecIMul.YMM, 0>,
58210b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58220b57cec5SDimitry Andric}
58230b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
58240b57cec5SDimitry Andric  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
58250b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58260b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58270b57cec5SDimitry Andric  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
58280b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58290b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58300b57cec5SDimitry Andric  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
58310b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58320b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58330b57cec5SDimitry Andric  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
58340b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58350b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58360b57cec5SDimitry Andric}
58370b57cec5SDimitry Andric
58380b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
58390b57cec5SDimitry Andric  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
58400b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58410b57cec5SDimitry Andric  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
58420b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58430b57cec5SDimitry Andric  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
58440b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58450b57cec5SDimitry Andric  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
58460b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58470b57cec5SDimitry Andric  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
58480b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58490b57cec5SDimitry Andric  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
58500b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58510b57cec5SDimitry Andric  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
58520b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58530b57cec5SDimitry Andric  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
58540b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
58550b57cec5SDimitry Andric  defm PMULDQ   : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
58560b57cec5SDimitry Andric                                 memop, i128mem, SchedWriteVecIMul.XMM, 1>;
58570b57cec5SDimitry Andric}
58580b57cec5SDimitry Andric
58590b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
58600b57cec5SDimitry Andric  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
58610b57cec5SDimitry Andric                                 load, i128mem, SchedWritePMULLD.XMM, 0>,
58620b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
58630b57cec5SDimitry Andriclet Predicates = [HasAVX] in
58640b57cec5SDimitry Andric  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
58650b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
58660b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
58670b57cec5SDimitry Andric
58680b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
58690b57cec5SDimitry Andric  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
58700b57cec5SDimitry Andric                                  load, i256mem, SchedWritePMULLD.YMM, 0>,
58710b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58720b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
58730b57cec5SDimitry Andric  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
58740b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
58750b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
58760b57cec5SDimitry Andric
58770b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
58780b57cec5SDimitry Andric  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
58790b57cec5SDimitry Andric                                memop, i128mem, SchedWritePMULLD.XMM, 1>;
58800b57cec5SDimitry Andric  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
58810b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM, 1>;
58820b57cec5SDimitry Andric}
58830b57cec5SDimitry Andric
58840b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
58850b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
58860b57cec5SDimitry Andric                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
58870b57cec5SDimitry Andric                 X86MemOperand x86memop, bit Is2Addr,
58880b57cec5SDimitry Andric                 X86FoldableSchedWrite sched> {
58890b57cec5SDimitry Andric  let isCommutable = 1 in
58900b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
58910b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
58920b57cec5SDimitry Andric        !if(Is2Addr,
58930b57cec5SDimitry Andric            !strconcat(OpcodeStr,
58940b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
58950b57cec5SDimitry Andric            !strconcat(OpcodeStr,
58960b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
58978bcb0991SDimitry Andric        [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
58980b57cec5SDimitry Andric        Sched<[sched]>;
58990b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
59000b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
59010b57cec5SDimitry Andric        !if(Is2Addr,
59020b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59030b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59040b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59050b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59060b57cec5SDimitry Andric        [(set RC:$dst,
59078bcb0991SDimitry Andric          (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
59080b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
59090b57cec5SDimitry Andric}
59100b57cec5SDimitry Andric
59110b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
59120b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
59130b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
59140b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr,
59150b57cec5SDimitry Andric                           X86FoldableSchedWrite sched> {
59160b57cec5SDimitry Andric  let isCommutable = 1 in
59170b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
59180b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
59190b57cec5SDimitry Andric        !if(Is2Addr,
59200b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59210b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59220b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59230b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59248bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
59250b57cec5SDimitry Andric        Sched<[sched]>;
59260b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
59270b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
59280b57cec5SDimitry Andric        !if(Is2Addr,
59290b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59300b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
59310b57cec5SDimitry Andric            !strconcat(OpcodeStr,
59320b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
59330b57cec5SDimitry Andric        [(set RC:$dst,
59348bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
59350b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
59360b57cec5SDimitry Andric}
59370b57cec5SDimitry Andric
59388bcb0991SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<timm, [{
59390b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x03;
59400b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x03, SDLoc(N));
59410b57cec5SDimitry Andric}]>;
59420b57cec5SDimitry Andric
59438bcb0991SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<timm, [{
59440b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0x0f;
59450b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0x0f, SDLoc(N));
59460b57cec5SDimitry Andric}]>;
59470b57cec5SDimitry Andric
59488bcb0991SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<timm, [{
59490b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue() & 0xff;
59500b57cec5SDimitry Andric  return getI8Imm(Imm ^ 0xff, SDLoc(N));
59510b57cec5SDimitry Andric}]>;
59520b57cec5SDimitry Andric
59530b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
59548bcb0991SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<timm, [{
59550b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
59560b57cec5SDimitry Andric  uint8_t NewImm = 0;
59570b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
59580b57cec5SDimitry Andric    if (Imm & (1 << i))
59590b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
59600b57cec5SDimitry Andric  }
59610b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
59620b57cec5SDimitry Andric}]>;
59630b57cec5SDimitry Andric
59640b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
59658bcb0991SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<timm, [{
59660b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
59670b57cec5SDimitry Andric  uint8_t NewImm = 0;
59680b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
59690b57cec5SDimitry Andric    if (Imm & (1 << i))
59700b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
59710b57cec5SDimitry Andric  }
59720b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
59730b57cec5SDimitry Andric}]>;
59740b57cec5SDimitry Andric
59750b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
59768bcb0991SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<timm, [{
59770b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
59780b57cec5SDimitry Andric  uint8_t NewImm = 0;
59790b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
59800b57cec5SDimitry Andric    if (Imm & (1 << i))
59810b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
59820b57cec5SDimitry Andric  }
59830b57cec5SDimitry Andric  return getI8Imm(NewImm, SDLoc(N));
59840b57cec5SDimitry Andric}]>;
59850b57cec5SDimitry Andric
59860b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
59878bcb0991SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<timm, [{
59880b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
59890b57cec5SDimitry Andric  uint8_t NewImm = 0;
59900b57cec5SDimitry Andric  for (unsigned i = 0; i != 4; ++i) {
59910b57cec5SDimitry Andric    if (Imm & (1 << i))
59920b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
59930b57cec5SDimitry Andric  }
59940b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
59950b57cec5SDimitry Andric}]>;
59960b57cec5SDimitry Andric
59970b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
59988bcb0991SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<timm, [{
59990b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60000b57cec5SDimitry Andric  uint8_t NewImm = 0;
60010b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60020b57cec5SDimitry Andric    if (Imm & (1 << i))
60030b57cec5SDimitry Andric      NewImm |= 0xf << (i * 4);
60040b57cec5SDimitry Andric  }
60050b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
60060b57cec5SDimitry Andric}]>;
60070b57cec5SDimitry Andric
60080b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
60098bcb0991SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{
60100b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
60110b57cec5SDimitry Andric  uint8_t NewImm = 0;
60120b57cec5SDimitry Andric  for (unsigned i = 0; i != 2; ++i) {
60130b57cec5SDimitry Andric    if (Imm & (1 << i))
60140b57cec5SDimitry Andric      NewImm |= 0x3 << (i * 2);
60150b57cec5SDimitry Andric  }
60160b57cec5SDimitry Andric  return getI8Imm(NewImm ^ 0xf, SDLoc(N));
60170b57cec5SDimitry Andric}]>;
60180b57cec5SDimitry Andric
60190b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
60200b57cec5SDimitry Andric  let isCommutable = 0 in {
60210b57cec5SDimitry Andric    defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
60220b57cec5SDimitry Andric                                        VR128, load, i128mem, 0,
60230b57cec5SDimitry Andric                                        SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
60240b57cec5SDimitry Andric  }
60250b57cec5SDimitry Andric
6026480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in {
60270b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
60280b57cec5SDimitry Andric  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
60290b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
60300b57cec5SDimitry Andric                                   SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
60310b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
60320b57cec5SDimitry Andric  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
60330b57cec5SDimitry Andric                                   VR128, load, f128mem, 0,
60340b57cec5SDimitry Andric                                   SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
60350b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
60360b57cec5SDimitry Andric  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
60370b57cec5SDimitry Andric                                    VR256, load, i256mem, 0,
60380b57cec5SDimitry Andric                                    SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
60390b57cec5SDimitry Andric}
6040480093f4SDimitry Andric}
60410b57cec5SDimitry Andric
60420b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
60430b57cec5SDimitry Andric  let isCommutable = 0 in {
60440b57cec5SDimitry Andric  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
60450b57cec5SDimitry Andric                                  VR256, load, i256mem, 0,
60460b57cec5SDimitry Andric                                  SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
60470b57cec5SDimitry Andric  }
60480b57cec5SDimitry Andric}
60490b57cec5SDimitry Andric
60500b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
60510b57cec5SDimitry Andric  let isCommutable = 0 in {
60520b57cec5SDimitry Andric  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
60530b57cec5SDimitry Andric                                     VR128, memop, i128mem, 1,
60540b57cec5SDimitry Andric                                     SchedWriteMPSAD.XMM>;
60550b57cec5SDimitry Andric  }
60560b57cec5SDimitry Andric
60570b57cec5SDimitry Andric  let ExeDomain = SSEPackedSingle in
60580b57cec5SDimitry Andric  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
60590b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
6060480093f4SDimitry Andric                                  SchedWriteDPPS.XMM>, SIMD_EXC;
60610b57cec5SDimitry Andric  let ExeDomain = SSEPackedDouble in
60620b57cec5SDimitry Andric  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
60630b57cec5SDimitry Andric                                  VR128, memop, f128mem, 1,
6064480093f4SDimitry Andric                                  SchedWriteDPPD.XMM>, SIMD_EXC;
60650b57cec5SDimitry Andric}
60660b57cec5SDimitry Andric
60670b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
60680b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
60690b57cec5SDimitry Andric                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
60700b57cec5SDimitry Andric                           X86MemOperand x86memop, bit Is2Addr, Domain d,
60710b57cec5SDimitry Andric                           X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> {
60720b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
60730b57cec5SDimitry Andric  let isCommutable = 1 in
60740b57cec5SDimitry Andric  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
60750b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
60760b57cec5SDimitry Andric        !if(Is2Addr,
60770b57cec5SDimitry Andric            !strconcat(OpcodeStr,
60780b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
60790b57cec5SDimitry Andric            !strconcat(OpcodeStr,
60800b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
60818bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
60820b57cec5SDimitry Andric        Sched<[sched]>;
60830b57cec5SDimitry Andric  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
60840b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
60850b57cec5SDimitry Andric        !if(Is2Addr,
60860b57cec5SDimitry Andric            !strconcat(OpcodeStr,
60870b57cec5SDimitry Andric                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
60880b57cec5SDimitry Andric            !strconcat(OpcodeStr,
60890b57cec5SDimitry Andric                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
60900b57cec5SDimitry Andric        [(set RC:$dst,
60918bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
60920b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>;
60930b57cec5SDimitry Andric}
60940b57cec5SDimitry Andric
60950b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
60968bcb0991SDimitry Andric  def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
60970b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
60988bcb0991SDimitry Andric                                            (commuteXForm timm:$src3))>;
60990b57cec5SDimitry Andric}
61000b57cec5SDimitry Andric
61010b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
61020b57cec5SDimitry Andric  defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
61030b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedSingle,
61040b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm4>,
61050b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
61060b57cec5SDimitry Andric  defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
61070b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedSingle,
61080b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm8>,
61090b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
61100b57cec5SDimitry Andric  defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
61110b57cec5SDimitry Andric                                  VR128, load, f128mem, 0, SSEPackedDouble,
61120b57cec5SDimitry Andric                                  SchedWriteFBlend.XMM, BlendCommuteImm2>,
61130b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
61140b57cec5SDimitry Andric  defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
61150b57cec5SDimitry Andric                                   VR256, load, f256mem, 0, SSEPackedDouble,
61160b57cec5SDimitry Andric                                   SchedWriteFBlend.YMM, BlendCommuteImm4>,
61170b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
61180b57cec5SDimitry Andric  defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
61190b57cec5SDimitry Andric                                  VR128, load, i128mem, 0, SSEPackedInt,
61200b57cec5SDimitry Andric                                  SchedWriteBlend.XMM, BlendCommuteImm8>,
61210b57cec5SDimitry Andric                                  VEX_4V, VEX_WIG;
61220b57cec5SDimitry Andric}
61230b57cec5SDimitry Andric
61240b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
61250b57cec5SDimitry Andric  defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
61260b57cec5SDimitry Andric                                   VR256, load, i256mem, 0, SSEPackedInt,
61270b57cec5SDimitry Andric                                   SchedWriteBlend.YMM, BlendCommuteImm8>,
61280b57cec5SDimitry Andric                                   VEX_4V, VEX_L, VEX_WIG;
61290b57cec5SDimitry Andric}
61300b57cec5SDimitry Andric
61310b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
61320b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on.
61330b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
61348bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
61358bcb0991SDimitry Andric          (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>;
61368bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
61378bcb0991SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>;
61388bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
61398bcb0991SDimitry Andric          (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>;
61400b57cec5SDimitry Andric
61410b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
61420b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize.
61438bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
61448bcb0991SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
61458bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
61468bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
61478bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
61488bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
61490b57cec5SDimitry Andric
61508bcb0991SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
61518bcb0991SDimitry Andric          (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>;
61528bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
61538bcb0991SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>;
61548bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
61558bcb0991SDimitry Andric          (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>;
61560b57cec5SDimitry Andric
61570b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
61580b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
61598bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
61608bcb0991SDimitry Andric          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
61618bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
61628bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
61638bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
61648bcb0991SDimitry Andric          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
61650b57cec5SDimitry Andric}
61660b57cec5SDimitry Andric
61670b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
61680b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedSingle,
61690b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm4>;
61700b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
61710b57cec5SDimitry Andric                               VR128, memop, f128mem, 1, SSEPackedDouble,
61720b57cec5SDimitry Andric                               SchedWriteFBlend.XMM, BlendCommuteImm2>;
61730b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
61740b57cec5SDimitry Andric                               VR128, memop, i128mem, 1, SSEPackedInt,
61750b57cec5SDimitry Andric                               SchedWriteBlend.XMM, BlendCommuteImm8>;
61760b57cec5SDimitry Andric
61770b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
61780b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
61790b57cec5SDimitry Andric// it from becoming movss via commuting under optsize.
61808bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
61818bcb0991SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>;
61828bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
61838bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>;
61848bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
61858bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>;
61860b57cec5SDimitry Andric
61878bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
61888bcb0991SDimitry Andric          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>;
61898bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
61908bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
61918bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
61928bcb0991SDimitry Andric          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
61930b57cec5SDimitry Andric}
61940b57cec5SDimitry Andric
61950b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
61960b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
61970b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
61980b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
61990b57cec5SDimitry Andric          (VBLENDPDYrri VR256:$src1,
62000b57cec5SDimitry Andric                        (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
62010b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0x3)>;
62020b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
62030b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
62040b57cec5SDimitry Andric                        (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
62050b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
62060b57cec5SDimitry Andric
62070b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
62080b57cec5SDimitry Andric          (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
62090b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
62100b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
62110b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
62120b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
62130b57cec5SDimitry Andric}
62140b57cec5SDimitry Andric
62150b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
62160b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
62170b57cec5SDimitry Andric                                X86MemOperand x86memop, ValueType VT,
62180b57cec5SDimitry Andric                                PatFrag mem_frag, SDNode OpNode,
62190b57cec5SDimitry Andric                                X86FoldableSchedWrite sched> {
62200b57cec5SDimitry Andric  def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
62210b57cec5SDimitry Andric                  (ins RC:$src1, RC:$src2, RC:$src3),
62220b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
62230b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
62240b57cec5SDimitry Andric                  [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
62250b57cec5SDimitry Andric                  SSEPackedInt>, TAPD, VEX_4V,
62260b57cec5SDimitry Andric                Sched<[sched]>;
62270b57cec5SDimitry Andric
62280b57cec5SDimitry Andric  def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
62290b57cec5SDimitry Andric                  (ins RC:$src1, x86memop:$src2, RC:$src3),
62300b57cec5SDimitry Andric                  !strconcat(OpcodeStr,
62310b57cec5SDimitry Andric                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
62320b57cec5SDimitry Andric                  [(set RC:$dst,
62330b57cec5SDimitry Andric                        (OpNode RC:$src3, (mem_frag addr:$src2),
62340b57cec5SDimitry Andric                                RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
62350b57cec5SDimitry Andric                Sched<[sched.Folded, sched.ReadAfterFold,
62360b57cec5SDimitry Andric                       // x86memop:$src2
62370b57cec5SDimitry Andric                       ReadDefault, ReadDefault, ReadDefault, ReadDefault,
62380b57cec5SDimitry Andric                       ReadDefault,
62390b57cec5SDimitry Andric                       // RC::$src3
62400b57cec5SDimitry Andric                       sched.ReadAfterFold]>;
62410b57cec5SDimitry Andric}
62420b57cec5SDimitry Andric
62430b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
62440b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
62450b57cec5SDimitry Andricdefm VBLENDVPD  : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
62460b57cec5SDimitry Andric                                       v2f64, loadv2f64, X86Blendv,
62470b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
62480b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
62490b57cec5SDimitry Andric                                       v4f64, loadv4f64, X86Blendv,
62500b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
62510b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble
62520b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
62530b57cec5SDimitry Andricdefm VBLENDVPS  : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
62540b57cec5SDimitry Andric                                       v4f32, loadv4f32, X86Blendv,
62550b57cec5SDimitry Andric                                       SchedWriteFVarBlend.XMM>;
62560b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
62570b57cec5SDimitry Andric                                       v8f32, loadv8f32, X86Blendv,
62580b57cec5SDimitry Andric                                       SchedWriteFVarBlend.YMM>, VEX_L;
62590b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle
62600b57cec5SDimitry Andricdefm VPBLENDVB  : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
62610b57cec5SDimitry Andric                                       v16i8, loadv16i8, X86Blendv,
62620b57cec5SDimitry Andric                                       SchedWriteVarBlend.XMM>;
62630b57cec5SDimitry Andric}
62640b57cec5SDimitry Andric
62650b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
62660b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
62670b57cec5SDimitry Andric                                       v32i8, loadv32i8, X86Blendv,
62680b57cec5SDimitry Andric                                       SchedWriteVarBlend.YMM>, VEX_L;
62690b57cec5SDimitry Andric}
62700b57cec5SDimitry Andric
62710b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
62720b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
62730b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
62740b57cec5SDimitry Andric            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
62750b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
62760b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
62770b57cec5SDimitry Andric            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
62780b57cec5SDimitry Andric  def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
62790b57cec5SDimitry Andric                              (v8i32 VR256:$src2))),
62800b57cec5SDimitry Andric            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
62810b57cec5SDimitry Andric  def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
62820b57cec5SDimitry Andric                              (v4i64 VR256:$src2))),
62830b57cec5SDimitry Andric            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
62840b57cec5SDimitry Andric}
62850b57cec5SDimitry Andric
62860b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
62870b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
62880b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
62890b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in {
62900b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
62910b57cec5SDimitry Andric            (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
62920b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
62930b57cec5SDimitry Andric            (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
62940b57cec5SDimitry Andric
62950b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
62960b57cec5SDimitry Andric            (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
62970b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))),
62980b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
62990b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)),
63000b57cec5SDimitry Andric            (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
63010b57cec5SDimitry Andric
63020b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
63030b57cec5SDimitry Andric            (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
63040b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))),
63050b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
63060b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)),
63070b57cec5SDimitry Andric            (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
63080b57cec5SDimitry Andric
63090b57cec5SDimitry Andric  // Move low f32 and clear high bits.
63100b57cec5SDimitry Andric  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
63110b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
63120b57cec5SDimitry Andric             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
63130b57cec5SDimitry Andric                          (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)),
63140b57cec5SDimitry Andric                          (i8 1))), sub_xmm)>;
63150b57cec5SDimitry Andric  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
63160b57cec5SDimitry Andric            (SUBREG_TO_REG (i32 0),
63170b57cec5SDimitry Andric             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
63180b57cec5SDimitry Andric                          (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
63190b57cec5SDimitry Andric                          (i8 3))), sub_xmm)>;
63200b57cec5SDimitry Andric}
63210b57cec5SDimitry Andric
63220b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were
63230b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge
63240b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
63250b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in {
63260b57cec5SDimitry Andric  // With SSE41 we can use blends for these patterns.
63270b57cec5SDimitry Andric  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
63280b57cec5SDimitry Andric            (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
63290b57cec5SDimitry Andric  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
63300b57cec5SDimitry Andric            (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>;
63310b57cec5SDimitry Andric
63320b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
63330b57cec5SDimitry Andric            (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>;
63340b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))),
63350b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>;
63360b57cec5SDimitry Andric  def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)),
63370b57cec5SDimitry Andric            (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>;
63380b57cec5SDimitry Andric
63390b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
63400b57cec5SDimitry Andric            (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>;
63410b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))),
63420b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>;
63430b57cec5SDimitry Andric  def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)),
63440b57cec5SDimitry Andric            (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>;
63450b57cec5SDimitry Andric}
63460b57cec5SDimitry Andric
63470b57cec5SDimitry Andric
63480b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator
63490b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in {
63500b57cec5SDimitry Andric  multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
63510b57cec5SDimitry Andric                           PatFrag mem_frag, X86MemOperand x86memop,
63520b57cec5SDimitry Andric                           SDNode OpNode, X86FoldableSchedWrite sched> {
63530b57cec5SDimitry Andric    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
63540b57cec5SDimitry Andric                    (ins VR128:$src1, VR128:$src2),
63550b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
63560b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
63570b57cec5SDimitry Andric                    [(set VR128:$dst,
63580b57cec5SDimitry Andric                      (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
63590b57cec5SDimitry Andric                    Sched<[sched]>;
63600b57cec5SDimitry Andric
63610b57cec5SDimitry Andric    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
63620b57cec5SDimitry Andric                    (ins VR128:$src1, x86memop:$src2),
63630b57cec5SDimitry Andric                    !strconcat(OpcodeStr,
63640b57cec5SDimitry Andric                     "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
63650b57cec5SDimitry Andric                    [(set VR128:$dst,
63660b57cec5SDimitry Andric                      (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
63670b57cec5SDimitry Andric                    Sched<[sched.Folded, sched.ReadAfterFold]>;
63680b57cec5SDimitry Andric  }
63690b57cec5SDimitry Andric}
63700b57cec5SDimitry Andric
63710b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
63720b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
63730b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
63740b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
63750b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
63760b57cec5SDimitry Andric                              X86Blendv, SchedWriteFVarBlend.XMM>;
63770b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
63780b57cec5SDimitry Andric                              X86Blendv, SchedWriteVarBlend.XMM>;
63790b57cec5SDimitry Andric
63800b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument
63810b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
63820b57cec5SDimitry Andric                (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
63830b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
63840b57cec5SDimitry Andric                (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
63850b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
63860b57cec5SDimitry Andric                (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
63870b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
63880b57cec5SDimitry Andric                (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
63890b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
63900b57cec5SDimitry Andric                (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
63910b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
63920b57cec5SDimitry Andric                (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
63930b57cec5SDimitry Andric
63940b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
63950b57cec5SDimitry Andric  def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
63960b57cec5SDimitry Andric                              (v4i32 VR128:$src2))),
63970b57cec5SDimitry Andric            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
63980b57cec5SDimitry Andric  def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
63990b57cec5SDimitry Andric                              (v2i64 VR128:$src2))),
64000b57cec5SDimitry Andric            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
64010b57cec5SDimitry Andric}
64020b57cec5SDimitry Andric
64030b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
64040b57cec5SDimitry Andric
64050b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in
64060b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
64070b57cec5SDimitry Andric                        "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
64080b57cec5SDimitry Andric                        Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
64090b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in
64100b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
64110b57cec5SDimitry Andric                         "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
64120b57cec5SDimitry Andric                         Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
64130b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
64140b57cec5SDimitry Andric                       "movntdqa\t{$src, $dst|$dst, $src}", []>,
64150b57cec5SDimitry Andric                       Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
64160b57cec5SDimitry Andric
64170b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
64180b57cec5SDimitry Andric  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
64190b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64200b57cec5SDimitry Andric  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
64210b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64220b57cec5SDimitry Andric  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
64230b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64240b57cec5SDimitry Andric  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
64250b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64260b57cec5SDimitry Andric  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
64270b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64280b57cec5SDimitry Andric  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
64290b57cec5SDimitry Andric            (VMOVNTDQAYrm addr:$src)>;
64300b57cec5SDimitry Andric}
64310b57cec5SDimitry Andric
64320b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
64330b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
64340b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64350b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
64360b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64370b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
64380b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64390b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
64400b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64410b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
64420b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64430b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
64440b57cec5SDimitry Andric            (VMOVNTDQArm addr:$src)>;
64450b57cec5SDimitry Andric}
64460b57cec5SDimitry Andric
64470b57cec5SDimitry Andriclet Predicates = [UseSSE41] in {
64480b57cec5SDimitry Andric  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
64490b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64500b57cec5SDimitry Andric  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
64510b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64520b57cec5SDimitry Andric  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
64530b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64540b57cec5SDimitry Andric  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
64550b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64560b57cec5SDimitry Andric  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
64570b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64580b57cec5SDimitry Andric  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
64590b57cec5SDimitry Andric            (MOVNTDQArm addr:$src)>;
64600b57cec5SDimitry Andric}
64610b57cec5SDimitry Andric
64620b57cec5SDimitry Andric} // AddedComplexity
64630b57cec5SDimitry Andric
64640b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
64650b57cec5SDimitry Andric// SSE4.2 - Compare Instructions
64660b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
64670b57cec5SDimitry Andric
64680b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator
64690b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
64700b57cec5SDimitry Andric                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
64710b57cec5SDimitry Andric                          X86MemOperand x86memop, X86FoldableSchedWrite sched,
64720b57cec5SDimitry Andric                          bit Is2Addr = 1> {
64730b57cec5SDimitry Andric  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
64740b57cec5SDimitry Andric       (ins RC:$src1, RC:$src2),
64750b57cec5SDimitry Andric       !if(Is2Addr,
64760b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
64770b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
64780b57cec5SDimitry Andric       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
64790b57cec5SDimitry Andric       Sched<[sched]>;
64800b57cec5SDimitry Andric  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
64810b57cec5SDimitry Andric       (ins RC:$src1, x86memop:$src2),
64820b57cec5SDimitry Andric       !if(Is2Addr,
64830b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
64840b57cec5SDimitry Andric           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
64850b57cec5SDimitry Andric       [(set RC:$dst,
64860b57cec5SDimitry Andric         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
64870b57cec5SDimitry Andric       Sched<[sched.Folded, sched.ReadAfterFold]>;
64880b57cec5SDimitry Andric}
64890b57cec5SDimitry Andric
64900b57cec5SDimitry Andriclet Predicates = [HasAVX] in
64910b57cec5SDimitry Andric  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
64920b57cec5SDimitry Andric                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
64930b57cec5SDimitry Andric                                 VEX_4V, VEX_WIG;
64940b57cec5SDimitry Andric
64950b57cec5SDimitry Andriclet Predicates = [HasAVX2] in
64960b57cec5SDimitry Andric  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
64970b57cec5SDimitry Andric                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
64980b57cec5SDimitry Andric                                  VEX_4V, VEX_L, VEX_WIG;
64990b57cec5SDimitry Andric
65000b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in
65010b57cec5SDimitry Andric  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
65020b57cec5SDimitry Andric                                memop, i128mem, SchedWriteVecALU.XMM>;
65030b57cec5SDimitry Andric
65040b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65050b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions
65060b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65070b57cec5SDimitry Andric
65080b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> {
65090b57cec5SDimitry Andric  def rr : SS42AI<0x62, MRMSrcReg, (outs),
65100b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
65110b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65120b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM]>;
65130b57cec5SDimitry Andric  let mayLoad = 1 in
65140b57cec5SDimitry Andric  def rm :SS42AI<0x62, MRMSrcMem, (outs),
65150b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
65160b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65170b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
65180b57cec5SDimitry Andric}
65190b57cec5SDimitry Andric
65200b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
65210b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6522d409305fSDimitry Andric  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
65230b57cec5SDimitry Andric  defm PCMPISTRM  : pcmpistrm_SS42AI<"pcmpistrm"> ;
65240b57cec5SDimitry Andric}
65250b57cec5SDimitry Andric
65260b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> {
65270b57cec5SDimitry Andric  def rr : SS42AI<0x60, MRMSrcReg, (outs),
65280b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
65290b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
65300b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM]>;
65310b57cec5SDimitry Andric  let mayLoad = 1 in
65320b57cec5SDimitry Andric  def rm : SS42AI<0x60, MRMSrcMem, (outs),
65330b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
65340b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
65350b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
65360b57cec5SDimitry Andric}
65370b57cec5SDimitry Andric
65380b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
65390b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6540d409305fSDimitry Andric  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
65410b57cec5SDimitry Andric  defm PCMPESTRM :  SS42AI_pcmpestrm<"pcmpestrm">;
65420b57cec5SDimitry Andric}
65430b57cec5SDimitry Andric
65440b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> {
65450b57cec5SDimitry Andric  def rr : SS42AI<0x63, MRMSrcReg, (outs),
65460b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
65470b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65480b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI]>;
65490b57cec5SDimitry Andric  let mayLoad = 1 in
65500b57cec5SDimitry Andric  def rm : SS42AI<0x63, MRMSrcMem, (outs),
65510b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
65520b57cec5SDimitry Andric    !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
65530b57cec5SDimitry Andric    []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
65540b57cec5SDimitry Andric}
65550b57cec5SDimitry Andric
65560b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
65570b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6558d409305fSDimitry Andric  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
65590b57cec5SDimitry Andric  defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
65600b57cec5SDimitry Andric}
65610b57cec5SDimitry Andric
65620b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> {
65630b57cec5SDimitry Andric  def rr : SS42AI<0x61, MRMSrcReg, (outs),
65640b57cec5SDimitry Andric    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
65650b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
65660b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI]>;
65670b57cec5SDimitry Andric  let mayLoad = 1 in
65680b57cec5SDimitry Andric  def rm : SS42AI<0x61, MRMSrcMem, (outs),
65690b57cec5SDimitry Andric    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
65700b57cec5SDimitry Andric    !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
65710b57cec5SDimitry Andric    []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
65720b57cec5SDimitry Andric}
65730b57cec5SDimitry Andric
65740b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
65750b57cec5SDimitry Andric  let Predicates = [HasAVX] in
6576d409305fSDimitry Andric  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
65770b57cec5SDimitry Andric  defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
65780b57cec5SDimitry Andric}
65790b57cec5SDimitry Andric
65800b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65810b57cec5SDimitry Andric// SSE4.2 - CRC Instructions
65820b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
65830b57cec5SDimitry Andric
65840b57cec5SDimitry Andric// No CRC instructions have AVX equivalents
65850b57cec5SDimitry Andric
65860b57cec5SDimitry Andric// crc intrinsic instruction
65870b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size
65880b57cec5SDimitry Andric// of r and m.
65890b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
65900b57cec5SDimitry Andric                   RegisterClass RCIn, SDPatternOperator Int> :
65910b57cec5SDimitry Andric  SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
65920b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
65930b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
65940b57cec5SDimitry Andric         Sched<[WriteCRC32]>;
65950b57cec5SDimitry Andric
65960b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
65970b57cec5SDimitry Andric                   X86MemOperand x86memop, SDPatternOperator Int> :
65980b57cec5SDimitry Andric  SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
65990b57cec5SDimitry Andric         !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
66000b57cec5SDimitry Andric         [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
66010b57cec5SDimitry Andric         Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
66020b57cec5SDimitry Andric
66030b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
66040b57cec5SDimitry Andric  def CRC32r32m8  : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
66050b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
66060b57cec5SDimitry Andric  def CRC32r32r8  : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
66070b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_8>;
66080b57cec5SDimitry Andric  def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
66090b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
66100b57cec5SDimitry Andric  def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
66110b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_16>, OpSize16;
66120b57cec5SDimitry Andric  def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
66130b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
66140b57cec5SDimitry Andric  def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
66150b57cec5SDimitry Andric                                 int_x86_sse42_crc32_32_32>, OpSize32;
66160b57cec5SDimitry Andric  def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
66170b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
66180b57cec5SDimitry Andric  def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
66190b57cec5SDimitry Andric                                 int_x86_sse42_crc32_64_64>, REX_W;
66200b57cec5SDimitry Andric  let hasSideEffects = 0 in {
66210b57cec5SDimitry Andric    let mayLoad = 1 in
66220b57cec5SDimitry Andric    def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
66230b57cec5SDimitry Andric                                   null_frag>, REX_W;
66240b57cec5SDimitry Andric    def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
66250b57cec5SDimitry Andric                                   null_frag>, REX_W;
66260b57cec5SDimitry Andric  }
66270b57cec5SDimitry Andric}
66280b57cec5SDimitry Andric
66290b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
66300b57cec5SDimitry Andric// SHA-NI Instructions
66310b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
66320b57cec5SDimitry Andric
66330b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
66340b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
66350b57cec5SDimitry Andric                      X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
66360b57cec5SDimitry Andric  def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
66370b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
66380b57cec5SDimitry Andric             !if(UsesXMM0,
66390b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
66400b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
66410b57cec5SDimitry Andric             [!if(UsesXMM0,
66420b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
66430b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
66445ffd83dbSDimitry Andric             T8PS, Sched<[sched]>;
66450b57cec5SDimitry Andric
66460b57cec5SDimitry Andric  def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
66470b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
66480b57cec5SDimitry Andric             !if(UsesXMM0,
66490b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
66500b57cec5SDimitry Andric                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
66510b57cec5SDimitry Andric             [!if(UsesXMM0,
66520b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
66530b57cec5SDimitry Andric                    (memop addr:$src2), XMM0)),
66540b57cec5SDimitry Andric                  (set VR128:$dst, (IntId VR128:$src1,
66555ffd83dbSDimitry Andric                    (memop addr:$src2))))]>, T8PS,
66560b57cec5SDimitry Andric             Sched<[sched.Folded, sched.ReadAfterFold]>;
66570b57cec5SDimitry Andric}
66580b57cec5SDimitry Andric
66590b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
66600b57cec5SDimitry Andric  def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
66610b57cec5SDimitry Andric                         (ins VR128:$src1, VR128:$src2, u8imm:$src3),
66620b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
66630b57cec5SDimitry Andric                         [(set VR128:$dst,
66640b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
66655ffd83dbSDimitry Andric                            (i8 timm:$src3)))]>, TAPS,
66660b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM]>;
66670b57cec5SDimitry Andric  def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
66680b57cec5SDimitry Andric                         (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
66690b57cec5SDimitry Andric                         "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
66700b57cec5SDimitry Andric                         [(set VR128:$dst,
66710b57cec5SDimitry Andric                           (int_x86_sha1rnds4 VR128:$src1,
66720b57cec5SDimitry Andric                            (memop addr:$src2),
66735ffd83dbSDimitry Andric                            (i8 timm:$src3)))]>, TAPS,
66740b57cec5SDimitry Andric                         Sched<[SchedWriteVecIMul.XMM.Folded,
66750b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM.ReadAfterFold]>;
66760b57cec5SDimitry Andric
66770b57cec5SDimitry Andric  defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
66780b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
66790b57cec5SDimitry Andric  defm SHA1MSG1  : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1,
66800b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
66810b57cec5SDimitry Andric  defm SHA1MSG2  : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2,
66820b57cec5SDimitry Andric                              SchedWriteVecIMul.XMM>;
66830b57cec5SDimitry Andric
66840b57cec5SDimitry Andric  let Uses=[XMM0] in
66850b57cec5SDimitry Andric  defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
66860b57cec5SDimitry Andric                                SchedWriteVecIMul.XMM, 1>;
66870b57cec5SDimitry Andric
66880b57cec5SDimitry Andric  defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
66890b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
66900b57cec5SDimitry Andric  defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2,
66910b57cec5SDimitry Andric                               SchedWriteVecIMul.XMM>;
66920b57cec5SDimitry Andric}
66930b57cec5SDimitry Andric
66940b57cec5SDimitry Andric// Aliases with explicit %xmm0
66950b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
66960b57cec5SDimitry Andric                (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
66970b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
66980b57cec5SDimitry Andric                (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
66990b57cec5SDimitry Andric
67000b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67010b57cec5SDimitry Andric// AES-NI Instructions
67020b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
67030b57cec5SDimitry Andric
67040b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
67050b57cec5SDimitry Andric                             Intrinsic IntId, PatFrag ld_frag,
67060b57cec5SDimitry Andric                             bit Is2Addr = 0, RegisterClass RC = VR128,
67070b57cec5SDimitry Andric                             X86MemOperand MemOp = i128mem> {
67085ffd83dbSDimitry Andric  let AsmString = OpcodeStr#
67090b57cec5SDimitry Andric                  !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}",
67100b57cec5SDimitry Andric                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
67110b57cec5SDimitry Andric    def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst),
67120b57cec5SDimitry Andric                   (ins RC:$src1, RC:$src2), "",
67130b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>,
67140b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc]>;
67150b57cec5SDimitry Andric    def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
67160b57cec5SDimitry Andric                   (ins RC:$src1, MemOp:$src2), "",
67170b57cec5SDimitry Andric                   [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
67180b57cec5SDimitry Andric                   Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
67190b57cec5SDimitry Andric  }
67200b57cec5SDimitry Andric}
67210b57cec5SDimitry Andric
67220b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow
67230b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
67240b57cec5SDimitry Andric  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
67250b57cec5SDimitry Andric                         int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
67260b57cec5SDimitry Andric  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
67270b57cec5SDimitry Andric                         int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
67280b57cec5SDimitry Andric  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
67290b57cec5SDimitry Andric                         int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
67300b57cec5SDimitry Andric  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
67310b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
67320b57cec5SDimitry Andric}
67330b57cec5SDimitry Andric
67340b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in {
67350b57cec5SDimitry Andric  defm VAESENCY         : AESI_binop_rm_int<0xDC, "vaesenc",
67360b57cec5SDimitry Andric                         int_x86_aesni_aesenc_256, load, 0, VR256,
67370b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
67380b57cec5SDimitry Andric  defm VAESENCLASTY     : AESI_binop_rm_int<0xDD, "vaesenclast",
67390b57cec5SDimitry Andric                         int_x86_aesni_aesenclast_256, load, 0, VR256,
67400b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
67410b57cec5SDimitry Andric  defm VAESDECY         : AESI_binop_rm_int<0xDE, "vaesdec",
67420b57cec5SDimitry Andric                         int_x86_aesni_aesdec_256, load, 0, VR256,
67430b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
67440b57cec5SDimitry Andric  defm VAESDECLASTY     : AESI_binop_rm_int<0xDF, "vaesdeclast",
67450b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast_256, load, 0, VR256,
67460b57cec5SDimitry Andric                         i256mem>, VEX_4V, VEX_L, VEX_WIG;
67470b57cec5SDimitry Andric}
67480b57cec5SDimitry Andric
67490b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in {
67500b57cec5SDimitry Andric  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
67510b57cec5SDimitry Andric                         int_x86_aesni_aesenc, memop, 1>;
67520b57cec5SDimitry Andric  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
67530b57cec5SDimitry Andric                         int_x86_aesni_aesenclast, memop, 1>;
67540b57cec5SDimitry Andric  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
67550b57cec5SDimitry Andric                         int_x86_aesni_aesdec, memop, 1>;
67560b57cec5SDimitry Andric  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
67570b57cec5SDimitry Andric                         int_x86_aesni_aesdeclast, memop, 1>;
67580b57cec5SDimitry Andric}
67590b57cec5SDimitry Andric
67600b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation
67610b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
67620b57cec5SDimitry Andric  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
67630b57cec5SDimitry Andric      (ins VR128:$src1),
67640b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
67650b57cec5SDimitry Andric      [(set VR128:$dst,
67660b57cec5SDimitry Andric        (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
67670b57cec5SDimitry Andric      VEX, VEX_WIG;
67680b57cec5SDimitry Andric  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
67690b57cec5SDimitry Andric      (ins i128mem:$src1),
67700b57cec5SDimitry Andric      "vaesimc\t{$src1, $dst|$dst, $src1}",
67710b57cec5SDimitry Andric      [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
67720b57cec5SDimitry Andric      Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
67730b57cec5SDimitry Andric}
67740b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
67750b57cec5SDimitry Andric  (ins VR128:$src1),
67760b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
67770b57cec5SDimitry Andric  [(set VR128:$dst,
67780b57cec5SDimitry Andric    (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>;
67790b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
67800b57cec5SDimitry Andric  (ins i128mem:$src1),
67810b57cec5SDimitry Andric  "aesimc\t{$src1, $dst|$dst, $src1}",
67820b57cec5SDimitry Andric  [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
67830b57cec5SDimitry Andric  Sched<[WriteAESIMC.Folded]>;
67840b57cec5SDimitry Andric
67850b57cec5SDimitry Andric// AES Round Key Generation Assist
67860b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in {
67870b57cec5SDimitry Andric  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
67880b57cec5SDimitry Andric      (ins VR128:$src1, u8imm:$src2),
67890b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
67900b57cec5SDimitry Andric      [(set VR128:$dst,
67918bcb0991SDimitry Andric        (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
67920b57cec5SDimitry Andric      Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
67930b57cec5SDimitry Andric  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
67940b57cec5SDimitry Andric      (ins i128mem:$src1, u8imm:$src2),
67950b57cec5SDimitry Andric      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
67960b57cec5SDimitry Andric      [(set VR128:$dst,
67978bcb0991SDimitry Andric        (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
67980b57cec5SDimitry Andric      Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
67990b57cec5SDimitry Andric}
68000b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
68010b57cec5SDimitry Andric  (ins VR128:$src1, u8imm:$src2),
68020b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68030b57cec5SDimitry Andric  [(set VR128:$dst,
68048bcb0991SDimitry Andric    (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
68050b57cec5SDimitry Andric  Sched<[WriteAESKeyGen]>;
68060b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
68070b57cec5SDimitry Andric  (ins i128mem:$src1, u8imm:$src2),
68080b57cec5SDimitry Andric  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68090b57cec5SDimitry Andric  [(set VR128:$dst,
68108bcb0991SDimitry Andric    (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
68110b57cec5SDimitry Andric  Sched<[WriteAESKeyGen.Folded]>;
68120b57cec5SDimitry Andric
68130b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
68140b57cec5SDimitry Andric// PCLMUL Instructions
68150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
68160b57cec5SDimitry Andric
68170b57cec5SDimitry Andric// Immediate transform to help with commuting.
68188bcb0991SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<timm, [{
68190b57cec5SDimitry Andric  uint8_t Imm = N->getZExtValue();
68200b57cec5SDimitry Andric  return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
68210b57cec5SDimitry Andric}]>;
68220b57cec5SDimitry Andric
68230b57cec5SDimitry Andric// SSE carry-less Multiplication instructions
68240b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in {
68250b57cec5SDimitry Andric  let Constraints = "$src1 = $dst" in {
68260b57cec5SDimitry Andric    let isCommutable = 1 in
68270b57cec5SDimitry Andric    def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
68280b57cec5SDimitry Andric              (ins VR128:$src1, VR128:$src2, u8imm:$src3),
68290b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
68300b57cec5SDimitry Andric              [(set VR128:$dst,
68318bcb0991SDimitry Andric                (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
68320b57cec5SDimitry Andric                Sched<[WriteCLMul]>;
68330b57cec5SDimitry Andric
68340b57cec5SDimitry Andric    def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
68350b57cec5SDimitry Andric              (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
68360b57cec5SDimitry Andric              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
68370b57cec5SDimitry Andric              [(set VR128:$dst,
68380b57cec5SDimitry Andric                 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
68398bcb0991SDimitry Andric                  timm:$src3))]>,
68400b57cec5SDimitry Andric              Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
68410b57cec5SDimitry Andric  } // Constraints = "$src1 = $dst"
68420b57cec5SDimitry Andric
68430b57cec5SDimitry Andric  def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
68448bcb0991SDimitry Andric                                (i8 timm:$src3)),
68450b57cec5SDimitry Andric            (PCLMULQDQrm VR128:$src1, addr:$src2,
68468bcb0991SDimitry Andric                          (PCLMULCommuteImm timm:$src3))>;
68470b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL]
68480b57cec5SDimitry Andric
68490b57cec5SDimitry Andric// SSE aliases
68500b57cec5SDimitry Andricforeach HI = ["hq","lq"] in
68510b57cec5SDimitry Andricforeach LO = ["hq","lq"] in {
68520b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
68530b57cec5SDimitry Andric                  (PCLMULQDQrr VR128:$dst, VR128:$src,
68540b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
68550b57cec5SDimitry Andric  def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}",
68560b57cec5SDimitry Andric                  (PCLMULQDQrm VR128:$dst, i128mem:$src,
68570b57cec5SDimitry Andric                   !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>;
68580b57cec5SDimitry Andric}
68590b57cec5SDimitry Andric
68600b57cec5SDimitry Andric// AVX carry-less Multiplication instructions
68610b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
68620b57cec5SDimitry Andric                      PatFrag LdFrag, Intrinsic IntId> {
68630b57cec5SDimitry Andric  let isCommutable = 1 in
68640b57cec5SDimitry Andric  def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst),
68650b57cec5SDimitry Andric            (ins RC:$src1, RC:$src2, u8imm:$src3),
68660b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
68670b57cec5SDimitry Andric            [(set RC:$dst,
68688bcb0991SDimitry Andric              (IntId RC:$src1, RC:$src2, timm:$src3))]>,
68690b57cec5SDimitry Andric            Sched<[WriteCLMul]>;
68700b57cec5SDimitry Andric
68710b57cec5SDimitry Andric  def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
68720b57cec5SDimitry Andric            (ins RC:$src1, MemOp:$src2, u8imm:$src3),
68730b57cec5SDimitry Andric            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
68740b57cec5SDimitry Andric            [(set RC:$dst,
68758bcb0991SDimitry Andric               (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
68760b57cec5SDimitry Andric            Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
68770b57cec5SDimitry Andric
68780b57cec5SDimitry Andric  // We can commute a load in the first operand by swapping the sources and
68790b57cec5SDimitry Andric  // rotating the immediate.
68808bcb0991SDimitry Andric  def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
68810b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
68828bcb0991SDimitry Andric                                           (PCLMULCommuteImm timm:$src3))>;
68830b57cec5SDimitry Andric}
68840b57cec5SDimitry Andric
68850b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
68860b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
68870b57cec5SDimitry Andric                             int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
68880b57cec5SDimitry Andric
68890b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in
68900b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
68910b57cec5SDimitry Andric                              int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
68920b57cec5SDimitry Andric
68930b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
68940b57cec5SDimitry Andric                                   X86MemOperand MemOp, string Hi, string Lo> {
68955ffd83dbSDimitry Andric  def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68960b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2,
68970b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
68985ffd83dbSDimitry Andric  def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
68990b57cec5SDimitry Andric                  (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2,
69000b57cec5SDimitry Andric                        !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
69010b57cec5SDimitry Andric}
69020b57cec5SDimitry Andric
69030b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC,
69040b57cec5SDimitry Andric                              X86MemOperand MemOp> {
69050b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">;
69060b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">;
69070b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">;
69080b57cec5SDimitry Andric  defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">;
69090b57cec5SDimitry Andric}
69100b57cec5SDimitry Andric
69110b57cec5SDimitry Andric// AVX aliases
69120b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
69130b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
69140b57cec5SDimitry Andric
69150b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69160b57cec5SDimitry Andric// SSE4A Instructions
69170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69180b57cec5SDimitry Andric
69190b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in {
69200b57cec5SDimitry Andric
69210b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in {
69220b57cec5SDimitry Andriclet Constraints = "$src = $dst" in {
69230b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
69240b57cec5SDimitry Andric                 (ins VR128:$src, u8imm:$len, u8imm:$idx),
69250b57cec5SDimitry Andric                 "extrq\t{$idx, $len, $src|$src, $len, $idx}",
69268bcb0991SDimitry Andric                 [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
69278bcb0991SDimitry Andric                                    timm:$idx))]>,
69280b57cec5SDimitry Andric                 PD, Sched<[SchedWriteVecALU.XMM]>;
69290b57cec5SDimitry Andricdef EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
69300b57cec5SDimitry Andric              (ins VR128:$src, VR128:$mask),
69310b57cec5SDimitry Andric              "extrq\t{$mask, $src|$src, $mask}",
69320b57cec5SDimitry Andric              [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
69330b57cec5SDimitry Andric                                 VR128:$mask))]>,
69340b57cec5SDimitry Andric              PD, Sched<[SchedWriteVecALU.XMM]>;
69350b57cec5SDimitry Andric
69360b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
69370b57cec5SDimitry Andric                   (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
69380b57cec5SDimitry Andric                   "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
69390b57cec5SDimitry Andric                   [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
69408bcb0991SDimitry Andric                                      timm:$len, timm:$idx))]>,
69410b57cec5SDimitry Andric                   XD, Sched<[SchedWriteVecALU.XMM]>;
69420b57cec5SDimitry Andricdef INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
69430b57cec5SDimitry Andric                 (ins VR128:$src, VR128:$mask),
69440b57cec5SDimitry Andric                 "insertq\t{$mask, $src|$src, $mask}",
69450b57cec5SDimitry Andric                 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
69460b57cec5SDimitry Andric                                    VR128:$mask))]>,
69470b57cec5SDimitry Andric                 XD, Sched<[SchedWriteVecALU.XMM]>;
69480b57cec5SDimitry Andric}
69490b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt
69500b57cec5SDimitry Andric
69510b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores.
69520b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions
69530b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
69540b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
69550b57cec5SDimitry Andric                "movntss\t{$src, $dst|$dst, $src}", []>, XS;
69560b57cec5SDimitry Andric
69570b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
69580b57cec5SDimitry Andric                "movntsd\t{$src, $dst|$dst, $src}", []>, XD;
69590b57cec5SDimitry Andric} // SchedRW
69600b57cec5SDimitry Andric
69610b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst),
69620b57cec5SDimitry Andric          (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
69630b57cec5SDimitry Andric
69640b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst),
69650b57cec5SDimitry Andric          (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
69660b57cec5SDimitry Andric
69670b57cec5SDimitry Andric} // AddedComplexity
69680b57cec5SDimitry Andric} // HasSSE4A
69690b57cec5SDimitry Andric
69700b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69710b57cec5SDimitry Andric// AVX Instructions
69720b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69730b57cec5SDimitry Andric
69740b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
69750b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the
69760b57cec5SDimitry Andric//              destination operand
69770b57cec5SDimitry Andric//
69780b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
69790b57cec5SDimitry Andric                           X86MemOperand x86memop, ValueType VT,
69808bcb0991SDimitry Andric                           PatFrag bcast_frag, SchedWrite Sched> :
69810b57cec5SDimitry Andric  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
69820b57cec5SDimitry Andric        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
69838bcb0991SDimitry Andric        [(set RC:$dst, (VT (bcast_frag addr:$src)))]>,
69840b57cec5SDimitry Andric        Sched<[Sched]>, VEX;
69850b57cec5SDimitry Andric
69860b57cec5SDimitry Andric// AVX2 adds register forms
69870b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
69880b57cec5SDimitry Andric                        ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
69890b57cec5SDimitry Andric  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
69900b57cec5SDimitry Andric         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
69910b57cec5SDimitry Andric         [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
69920b57cec5SDimitry Andric         Sched<[Sched]>, VEX;
69930b57cec5SDimitry Andric
69940b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
69950b57cec5SDimitry Andric  def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
69968bcb0991SDimitry Andric                                         f32mem, v4f32, X86VBroadcastld32,
69970b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>;
69980b57cec5SDimitry Andric  def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
69998bcb0991SDimitry Andric                                         f32mem, v8f32, X86VBroadcastld32,
70000b57cec5SDimitry Andric                                         SchedWriteFShuffle.XMM.Folded>, VEX_L;
70010b57cec5SDimitry Andric}
70020b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in
70030b57cec5SDimitry Andricdef VBROADCASTSDYrm  : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
70048bcb0991SDimitry Andric                                        v4f64, X86VBroadcastld64,
70050b57cec5SDimitry Andric                                        SchedWriteFShuffle.XMM.Folded>, VEX_L;
70060b57cec5SDimitry Andric
70070b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in {
70080b57cec5SDimitry Andric  def VBROADCASTSSrr  : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
70090b57cec5SDimitry Andric                                          v4f32, v4f32, SchedWriteFShuffle.XMM>;
70100b57cec5SDimitry Andric  def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
70110b57cec5SDimitry Andric                                          v8f32, v4f32, WriteFShuffle256>, VEX_L;
70120b57cec5SDimitry Andric}
70130b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
70140b57cec5SDimitry Andricdef VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
70150b57cec5SDimitry Andric                                         v4f64, v2f64, WriteFShuffle256>, VEX_L;
70160b57cec5SDimitry Andric
70170b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70180b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
70190b57cec5SDimitry Andric//                  halves of a 256-bit vector.
70200b57cec5SDimitry Andric//
70210b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
70220b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
70230b57cec5SDimitry Andric                           (ins i128mem:$src),
70240b57cec5SDimitry Andric                           "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
70250b57cec5SDimitry Andric                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
70260b57cec5SDimitry Andric
70270b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
70280b57cec5SDimitry Andric    ExeDomain = SSEPackedSingle in
70290b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
70300b57cec5SDimitry Andric                           (ins f128mem:$src),
70310b57cec5SDimitry Andric                           "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
70320b57cec5SDimitry Andric                           Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
70330b57cec5SDimitry Andric
70340b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
7035e8d8bef9SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
70360b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7037e8d8bef9SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
70380b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
70390b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can
70400b57cec5SDimitry Andric// convert to integer when profitable.
7041e8d8bef9SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
70420b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7043e8d8bef9SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
70440b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7045e8d8bef9SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
70460b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
7047e8d8bef9SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
70480b57cec5SDimitry Andric          (VBROADCASTF128 addr:$src)>;
70490b57cec5SDimitry Andric}
70500b57cec5SDimitry Andric
70510b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70520b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values
70530b57cec5SDimitry Andric//
70540b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
70550b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
70560b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
70570b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
70580b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
70590b57cec5SDimitry Andriclet mayLoad = 1 in
70600b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
70610b57cec5SDimitry Andric          (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
70620b57cec5SDimitry Andric          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
70630b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
70640b57cec5SDimitry Andric}
70650b57cec5SDimitry Andric
70660b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS
70670b57cec5SDimitry Andric// with YMM register containing zero.
70680b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs.
70690b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
70700b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
70710b57cec5SDimitry Andric}
70720b57cec5SDimitry Andric
70730b57cec5SDimitry Andricmulticlass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
70740b57cec5SDimitry Andric                            PatFrag memop_frag> {
70750b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
70760b57cec5SDimitry Andric                                   (iPTR imm)),
70770b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
70780b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
70790b57cec5SDimitry Andric  def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
70800b57cec5SDimitry Andric                                    (From (memop_frag addr:$src2)),
70810b57cec5SDimitry Andric                                    (iPTR imm)),
70820b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
70830b57cec5SDimitry Andric                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
70840b57cec5SDimitry Andric}
70850b57cec5SDimitry Andric
70860b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
70870b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>;
70880b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>;
70890b57cec5SDimitry Andric}
70900b57cec5SDimitry Andric
70910b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
70920b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64,  loadv2i64>;
70930b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32,  loadv4i32>;
70940b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
70950b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8,  loadv16i8>;
70960b57cec5SDimitry Andric}
70970b57cec5SDimitry Andric
70980b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
70990b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values
71000b57cec5SDimitry Andric//
71010b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
71020b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
71030b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
71040b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
71050b57cec5SDimitry Andric          []>, Sched<[WriteFShuffle256]>, VEX, VEX_L;
71060b57cec5SDimitry Andriclet mayStore = 1 in
71070b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
71080b57cec5SDimitry Andric          (ins f128mem:$dst, VR256:$src1, u8imm:$src2),
71090b57cec5SDimitry Andric          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
71100b57cec5SDimitry Andric          []>, Sched<[WriteFStoreX]>, VEX, VEX_L;
71110b57cec5SDimitry Andric}
71120b57cec5SDimitry Andric
71130b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> {
71140b57cec5SDimitry Andric  def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
71150b57cec5SDimitry Andric            (To (!cast<Instruction>(InstrStr#rr)
71160b57cec5SDimitry Andric                                    (From VR256:$src1),
71170b57cec5SDimitry Andric                                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
71180b57cec5SDimitry Andric  def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1),
71190b57cec5SDimitry Andric                                                 (iPTR imm))), addr:$dst),
71200b57cec5SDimitry Andric            (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1,
71210b57cec5SDimitry Andric             (EXTRACT_get_vextract128_imm VR128:$ext))>;
71220b57cec5SDimitry Andric}
71230b57cec5SDimitry Andric
71240b57cec5SDimitry Andric// AVX1 patterns
71250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
71260b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>;
71270b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>;
71280b57cec5SDimitry Andric}
71290b57cec5SDimitry Andric
71300b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
71310b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v4i64,  v2i64>;
71320b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v8i32,  v4i32>;
71330b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
71340b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTF128", v32i8,  v16i8>;
71350b57cec5SDimitry Andric}
71360b57cec5SDimitry Andric
71370b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
71380b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores
71390b57cec5SDimitry Andric//
71400b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
71410b57cec5SDimitry Andric                          Intrinsic IntLd, Intrinsic IntLd256,
71428bcb0991SDimitry Andric                          Intrinsic IntSt, Intrinsic IntSt256,
71438bcb0991SDimitry Andric                          X86SchedWriteMaskMove schedX,
71448bcb0991SDimitry Andric                          X86SchedWriteMaskMove schedY> {
71450b57cec5SDimitry Andric  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
71460b57cec5SDimitry Andric             (ins VR128:$src1, f128mem:$src2),
71470b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
71480b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
71498bcb0991SDimitry Andric             VEX_4V, Sched<[schedX.RM]>;
71500b57cec5SDimitry Andric  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
71510b57cec5SDimitry Andric             (ins VR256:$src1, f256mem:$src2),
71520b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
71530b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
71548bcb0991SDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.RM]>;
71550b57cec5SDimitry Andric  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
71560b57cec5SDimitry Andric             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
71570b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
71580b57cec5SDimitry Andric             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
71598bcb0991SDimitry Andric             VEX_4V, Sched<[schedX.MR]>;
71600b57cec5SDimitry Andric  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
71610b57cec5SDimitry Andric             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
71620b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
71630b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
71648bcb0991SDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.MR]>;
71650b57cec5SDimitry Andric}
71660b57cec5SDimitry Andric
71670b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
71680b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
71690b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps,
71700b57cec5SDimitry Andric                                 int_x86_avx_maskload_ps_256,
71710b57cec5SDimitry Andric                                 int_x86_avx_maskstore_ps,
71728bcb0991SDimitry Andric                                 int_x86_avx_maskstore_ps_256,
71738bcb0991SDimitry Andric                                 WriteFMaskMove32, WriteFMaskMove32Y>;
71740b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
71750b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
71760b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd,
71770b57cec5SDimitry Andric                                 int_x86_avx_maskload_pd_256,
71780b57cec5SDimitry Andric                                 int_x86_avx_maskstore_pd,
71798bcb0991SDimitry Andric                                 int_x86_avx_maskstore_pd_256,
71808bcb0991SDimitry Andric                                 WriteFMaskMove64, WriteFMaskMove64Y>;
71810b57cec5SDimitry Andric
71820b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7183e8d8bef9SDimitry Andric// AVX_VNNI
7184e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
7185*fe6060f1SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst",
7186*fe6060f1SDimitry Andric    ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in
7187e8d8bef9SDimitry Andricmulticlass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7188e8d8bef9SDimitry Andric                       bit IsCommutable> {
7189e8d8bef9SDimitry Andric  let isCommutable = IsCommutable in
7190e8d8bef9SDimitry Andric  def rr  : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
7191e8d8bef9SDimitry Andric             (ins VR128:$src1, VR128:$src2, VR128:$src3),
7192e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7193e8d8bef9SDimitry Andric             [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
7194e8d8bef9SDimitry Andric                                       VR128:$src2, VR128:$src3)))]>,
7195e8d8bef9SDimitry Andric             VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
7196e8d8bef9SDimitry Andric
7197e8d8bef9SDimitry Andric  def rm  : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
7198e8d8bef9SDimitry Andric             (ins VR128:$src1, VR128:$src2, i128mem:$src3),
7199e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7200e8d8bef9SDimitry Andric             [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
7201e8d8bef9SDimitry Andric                                      (loadv4i32 addr:$src3))))]>,
7202e8d8bef9SDimitry Andric             VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
7203e8d8bef9SDimitry Andric
7204e8d8bef9SDimitry Andric  let isCommutable = IsCommutable in
7205e8d8bef9SDimitry Andric  def Yrr  : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
7206e8d8bef9SDimitry Andric             (ins VR256:$src1, VR256:$src2, VR256:$src3),
7207e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7208e8d8bef9SDimitry Andric             [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
7209e8d8bef9SDimitry Andric                                       VR256:$src2, VR256:$src3)))]>,
7210e8d8bef9SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
7211e8d8bef9SDimitry Andric
7212e8d8bef9SDimitry Andric  def Yrm  : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
7213e8d8bef9SDimitry Andric             (ins VR256:$src1, VR256:$src2, i256mem:$src3),
7214e8d8bef9SDimitry Andric             !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7215e8d8bef9SDimitry Andric             [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
7216e8d8bef9SDimitry Andric                                      (loadv8i32 addr:$src3))))]>,
7217e8d8bef9SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
7218e8d8bef9SDimitry Andric}
7219e8d8bef9SDimitry Andric
7220*fe6060f1SDimitry Andricdefm VPDPBUSD   : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;
7221*fe6060f1SDimitry Andricdefm VPDPBUSDS  : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>;
7222*fe6060f1SDimitry Andricdefm VPDPWSSD   : avx_vnni_rm<0x52, "vpdpwssd",  X86Vpdpwssd, 1>;
7223*fe6060f1SDimitry Andricdefm VPDPWSSDS  : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>;
7224e8d8bef9SDimitry Andric
7225e8d8bef9SDimitry Andricdef X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
7226e8d8bef9SDimitry Andric                             (X86vpmaddwd node:$lhs, node:$rhs), [{
7227e8d8bef9SDimitry Andric  return N->hasOneUse();
7228e8d8bef9SDimitry Andric}]>;
7229e8d8bef9SDimitry Andric
7230e8d8bef9SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
7231e8d8bef9SDimitry Andric  def : Pat<(v8i32 (add VR256:$src1,
7232e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
7233e8d8bef9SDimitry Andric            (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
7234e8d8bef9SDimitry Andric  def : Pat<(v8i32 (add VR256:$src1,
7235e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
7236e8d8bef9SDimitry Andric            (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
7237e8d8bef9SDimitry Andric  def : Pat<(v4i32 (add VR128:$src1,
7238e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
7239e8d8bef9SDimitry Andric            (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
7240e8d8bef9SDimitry Andric  def : Pat<(v4i32 (add VR128:$src1,
7241e8d8bef9SDimitry Andric                        (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
7242e8d8bef9SDimitry Andric            (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
7243e8d8bef9SDimitry Andric}
7244e8d8bef9SDimitry Andric
7245e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
72460b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values
72470b57cec5SDimitry Andric//
72480b57cec5SDimitry Andric
72490b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
72500b57cec5SDimitry Andric                      RegisterClass RC, X86MemOperand x86memop_f,
72510b57cec5SDimitry Andric                      X86MemOperand x86memop_i,
72520b57cec5SDimitry Andric                      ValueType f_vt, ValueType i_vt,
72530b57cec5SDimitry Andric                      X86FoldableSchedWrite sched,
72540b57cec5SDimitry Andric                      X86FoldableSchedWrite varsched> {
72550b57cec5SDimitry Andric  let Predicates = [HasAVX, NoVLX] in {
72560b57cec5SDimitry Andric    def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
72570b57cec5SDimitry Andric               (ins RC:$src1, RC:$src2),
72580b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72590b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
72600b57cec5SDimitry Andric               Sched<[varsched]>;
72610b57cec5SDimitry Andric    def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
72620b57cec5SDimitry Andric               (ins RC:$src1, x86memop_i:$src2),
72630b57cec5SDimitry Andric               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72640b57cec5SDimitry Andric               [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
72650b57cec5SDimitry Andric                              (i_vt (load addr:$src2)))))]>, VEX_4V,
72660b57cec5SDimitry Andric               Sched<[varsched.Folded, sched.ReadAfterFold]>;
72670b57cec5SDimitry Andric
72680b57cec5SDimitry Andric    def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
72690b57cec5SDimitry Andric             (ins RC:$src1, u8imm:$src2),
72700b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72718bcb0991SDimitry Andric             [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
72720b57cec5SDimitry Andric             Sched<[sched]>;
72730b57cec5SDimitry Andric    def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
72740b57cec5SDimitry Andric             (ins x86memop_f:$src1, u8imm:$src2),
72750b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
72760b57cec5SDimitry Andric             [(set RC:$dst,
72778bcb0991SDimitry Andric               (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
72780b57cec5SDimitry Andric             Sched<[sched.Folded]>;
72790b57cec5SDimitry Andric  }// Predicates = [HasAVX, NoVLX]
72800b57cec5SDimitry Andric}
72810b57cec5SDimitry Andric
72820b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
72830b57cec5SDimitry Andric  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
72840b57cec5SDimitry Andric                               v4f32, v4i32, SchedWriteFShuffle.XMM,
72850b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
72860b57cec5SDimitry Andric  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
72870b57cec5SDimitry Andric                               v8f32, v8i32, SchedWriteFShuffle.YMM,
72880b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
72890b57cec5SDimitry Andric}
72900b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in {
72910b57cec5SDimitry Andric  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
72920b57cec5SDimitry Andric                               v2f64, v2i64, SchedWriteFShuffle.XMM,
72930b57cec5SDimitry Andric                               SchedWriteFVarShuffle.XMM>;
72940b57cec5SDimitry Andric  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
72950b57cec5SDimitry Andric                               v4f64, v4i64, SchedWriteFShuffle.YMM,
72960b57cec5SDimitry Andric                               SchedWriteFVarShuffle.YMM>, VEX_L;
72970b57cec5SDimitry Andric}
72980b57cec5SDimitry Andric
72990b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
73000b57cec5SDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
73010b57cec5SDimitry Andric//
73020b57cec5SDimitry Andric
73030b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in {
73040b57cec5SDimitry Andriclet isCommutable = 1 in
73050b57cec5SDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
73060b57cec5SDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7307e8d8bef9SDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7308e8d8bef9SDimitry Andric          VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
73090b57cec5SDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
73100b57cec5SDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7311e8d8bef9SDimitry Andric          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7312e8d8bef9SDimitry Andric          VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
73130b57cec5SDimitry Andric}
73140b57cec5SDimitry Andric
73150b57cec5SDimitry Andric// Immediate transform to help with commuting.
73168bcb0991SDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<timm, [{
73170b57cec5SDimitry Andric  return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
73180b57cec5SDimitry Andric}]>;
73190b57cec5SDimitry Andric
7320e8d8bef9SDimitry Andricmulticlass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
7321e8d8bef9SDimitry Andric  def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
7322e8d8bef9SDimitry Andric            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
7323e8d8bef9SDimitry Andric  def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
7324e8d8bef9SDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
73250b57cec5SDimitry Andric  // Pattern with load in other operand.
7326e8d8bef9SDimitry Andric  def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
7327e8d8bef9SDimitry Andric            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
7328e8d8bef9SDimitry Andric                                             (Perm2XCommuteImm timm:$imm))>;
7329e8d8bef9SDimitry Andric}
7330e8d8bef9SDimitry Andric
7331e8d8bef9SDimitry Andriclet Predicates = [HasAVX] in {
7332e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
7333e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
73340b57cec5SDimitry Andric}
73350b57cec5SDimitry Andric
73360b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
7337e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v4i64,  loadv4i64>;
7338e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v8i32,  loadv8i32>;
7339e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
7340e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2F128", v32i8,  loadv32i8>;
73410b57cec5SDimitry Andric}
73420b57cec5SDimitry Andric
73430b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
73440b57cec5SDimitry Andric// VZERO - Zero YMM registers
73450b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31.
73460b57cec5SDimitry Andric//
73470b57cec5SDimitry Andric
73480b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in {
73490b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
73500b57cec5SDimitry Andric            YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
73510b57cec5SDimitry Andric  // Zero All YMM registers
73520b57cec5SDimitry Andric  def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
73530b57cec5SDimitry Andric                  [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
73540b57cec5SDimitry Andric                  Requires<[HasAVX]>, VEX_WIG;
73550b57cec5SDimitry Andric
73560b57cec5SDimitry Andric  // Zero Upper bits of YMM registers
73570b57cec5SDimitry Andric  def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
73580b57cec5SDimitry Andric                     [(int_x86_avx_vzeroupper)]>, PS, VEX,
73590b57cec5SDimitry Andric                     Requires<[HasAVX]>, VEX_WIG;
73600b57cec5SDimitry Andric} // Defs
73610b57cec5SDimitry Andric} // SchedRW
73620b57cec5SDimitry Andric
73630b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
73640b57cec5SDimitry Andric// Half precision conversion instructions
73650b57cec5SDimitry Andric//
73660b57cec5SDimitry Andric
73670b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
73680b57cec5SDimitry Andric                      X86FoldableSchedWrite sched> {
73690b57cec5SDimitry Andric  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
73700b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
73715ffd83dbSDimitry Andric             [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
73720b57cec5SDimitry Andric             T8PD, VEX, Sched<[sched]>;
73730b57cec5SDimitry Andric  let hasSideEffects = 0, mayLoad = 1 in
73740b57cec5SDimitry Andric  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
73750b57cec5SDimitry Andric             "vcvtph2ps\t{$src, $dst|$dst, $src}",
73765ffd83dbSDimitry Andric             []>, T8PD, VEX, Sched<[sched.Folded]>;
73770b57cec5SDimitry Andric}
73780b57cec5SDimitry Andric
73790b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
73800b57cec5SDimitry Andric                      SchedWrite RR, SchedWrite MR> {
73810b57cec5SDimitry Andric  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
73820b57cec5SDimitry Andric               (ins RC:$src1, i32u8imm:$src2),
73830b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
73845ffd83dbSDimitry Andric               [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
73850b57cec5SDimitry Andric               TAPD, VEX, Sched<[RR]>;
73860b57cec5SDimitry Andric  let hasSideEffects = 0, mayStore = 1 in
73870b57cec5SDimitry Andric  def mr : Ii8<0x1D, MRMDestMem, (outs),
73880b57cec5SDimitry Andric               (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
73890b57cec5SDimitry Andric               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
73900b57cec5SDimitry Andric               TAPD, VEX, Sched<[MR]>;
73910b57cec5SDimitry Andric}
73920b57cec5SDimitry Andric
73930b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in {
7394480093f4SDimitry Andric  defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC;
7395480093f4SDimitry Andric  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC;
73960b57cec5SDimitry Andric  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
7397480093f4SDimitry Andric                               WriteCvtPS2PHSt>, SIMD_EXC;
73980b57cec5SDimitry Andric  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
7399480093f4SDimitry Andric                               WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
74000b57cec5SDimitry Andric
74010b57cec5SDimitry Andric  // Pattern match vcvtph2ps of a scalar i64 load.
74025ffd83dbSDimitry Andric  def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
74030b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
74045ffd83dbSDimitry Andric  def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
74050b57cec5SDimitry Andric              (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
74060b57cec5SDimitry Andric            (VCVTPH2PSrm addr:$src)>;
74075ffd83dbSDimitry Andric  def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))),
74085ffd83dbSDimitry Andric            (VCVTPH2PSYrm addr:$src)>;
74090b57cec5SDimitry Andric
74100b57cec5SDimitry Andric  def : Pat<(store (f64 (extractelt
74115ffd83dbSDimitry Andric                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
74120b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
74138bcb0991SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
74140b57cec5SDimitry Andric  def : Pat<(store (i64 (extractelt
74155ffd83dbSDimitry Andric                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
74160b57cec5SDimitry Andric                         (iPTR 0))), addr:$dst),
74178bcb0991SDimitry Andric            (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
74185ffd83dbSDimitry Andric  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
74198bcb0991SDimitry Andric            (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
74200b57cec5SDimitry Andric}
74210b57cec5SDimitry Andric
74220b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
74230b57cec5SDimitry Andric// AVX2 Instructions
74240b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
74250b57cec5SDimitry Andric
74260b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
74270b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
74280b57cec5SDimitry Andric                          ValueType OpVT, X86FoldableSchedWrite sched,
74290b57cec5SDimitry Andric                          RegisterClass RC,
74300b57cec5SDimitry Andric                          X86MemOperand x86memop, SDNodeXForm commuteXForm> {
74310b57cec5SDimitry Andric  let isCommutable = 1 in
74320b57cec5SDimitry Andric  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
74330b57cec5SDimitry Andric        (ins RC:$src1, RC:$src2, u8imm:$src3),
74340b57cec5SDimitry Andric        !strconcat(OpcodeStr,
74350b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
74368bcb0991SDimitry Andric        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
74370b57cec5SDimitry Andric        Sched<[sched]>, VEX_4V;
74380b57cec5SDimitry Andric  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
74390b57cec5SDimitry Andric        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
74400b57cec5SDimitry Andric        !strconcat(OpcodeStr,
74410b57cec5SDimitry Andric            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
74420b57cec5SDimitry Andric        [(set RC:$dst,
74438bcb0991SDimitry Andric          (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
74440b57cec5SDimitry Andric        Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
74450b57cec5SDimitry Andric
74460b57cec5SDimitry Andric  // Pattern to commute if load is in first source.
74478bcb0991SDimitry Andric  def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
74480b57cec5SDimitry Andric            (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
74498bcb0991SDimitry Andric                                            (commuteXForm timm:$src3))>;
74500b57cec5SDimitry Andric}
74510b57cec5SDimitry Andric
74520b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
74530b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
74540b57cec5SDimitry Andric                               SchedWriteBlend.XMM, VR128, i128mem,
74550b57cec5SDimitry Andric                               BlendCommuteImm4>;
74560b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
74570b57cec5SDimitry Andric                                SchedWriteBlend.YMM, VR256, i256mem,
74580b57cec5SDimitry Andric                                BlendCommuteImm8>, VEX_L;
74590b57cec5SDimitry Andric
74608bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
74618bcb0991SDimitry Andric          (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>;
74628bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
74638bcb0991SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>;
74648bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
74658bcb0991SDimitry Andric          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>;
74660b57cec5SDimitry Andric
74678bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
74688bcb0991SDimitry Andric          (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>;
74698bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
74708bcb0991SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>;
74718bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
74728bcb0991SDimitry Andric          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>;
74730b57cec5SDimitry Andric}
74740b57cec5SDimitry Andric
74750b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is
74760b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128.
74775ffd83dbSDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing should
74780b57cec5SDimitry Andric// take care of using integer instructions when profitable.
74790b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
74800b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
74810b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
74820b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
74830b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
74840b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
74850b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
74860b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
74870b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
74880b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
74890b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
74900b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
74910b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
74920b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
74930b57cec5SDimitry Andric          (VBLENDPSYrri VR256:$src1,
74940b57cec5SDimitry Andric                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
74950b57cec5SDimitry Andric                                       VR128:$src2, sub_xmm), 0xf)>;
74960b57cec5SDimitry Andric
74970b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
74980b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
74990b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
75000b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
75010b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75020b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
75030b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
75040b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75050b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
75060b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
75070b57cec5SDimitry Andric          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
75080b57cec5SDimitry Andric                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
75090b57cec5SDimitry Andric}
75100b57cec5SDimitry Andric
75110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
75120b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the
75130b57cec5SDimitry Andric//               destination operand
75140b57cec5SDimitry Andric//
75150b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr,
75168bcb0991SDimitry Andric                          X86MemOperand x86memop, PatFrag bcast_frag,
75170b57cec5SDimitry Andric                          ValueType OpVT128, ValueType OpVT256, Predicate prd> {
75180b57cec5SDimitry Andric  let Predicates = [HasAVX2, prd] in {
75190b57cec5SDimitry Andric    def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
75200b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
75210b57cec5SDimitry Andric                  [(set VR128:$dst,
75220b57cec5SDimitry Andric                   (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
75230b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM]>, VEX;
75240b57cec5SDimitry Andric    def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
75250b57cec5SDimitry Andric                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
75260b57cec5SDimitry Andric                  [(set VR128:$dst,
75278bcb0991SDimitry Andric                   (OpVT128 (bcast_frag addr:$src)))]>,
75280b57cec5SDimitry Andric                  Sched<[SchedWriteShuffle.XMM.Folded]>, VEX;
75290b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
75300b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
75310b57cec5SDimitry Andric                   [(set VR256:$dst,
75320b57cec5SDimitry Andric                    (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
75330b57cec5SDimitry Andric                   Sched<[WriteShuffle256]>, VEX, VEX_L;
75340b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
75350b57cec5SDimitry Andric                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
75360b57cec5SDimitry Andric                   [(set VR256:$dst,
75378bcb0991SDimitry Andric                    (OpVT256 (bcast_frag addr:$src)))]>,
75380b57cec5SDimitry Andric                   Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L;
75390b57cec5SDimitry Andric
75400b57cec5SDimitry Andric    // Provide aliases for broadcast from the same register class that
75410b57cec5SDimitry Andric    // automatically does the extract.
75420b57cec5SDimitry Andric    def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
75430b57cec5SDimitry Andric              (!cast<Instruction>(NAME#"Yrr")
75440b57cec5SDimitry Andric                  (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
75450b57cec5SDimitry Andric  }
75460b57cec5SDimitry Andric}
75470b57cec5SDimitry Andric
75488bcb0991SDimitry Andricdefm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8,
75490b57cec5SDimitry Andric                                    v16i8, v32i8, NoVLX_Or_NoBWI>;
75508bcb0991SDimitry Andricdefm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16,
75510b57cec5SDimitry Andric                                    v8i16, v16i16, NoVLX_Or_NoBWI>;
75528bcb0991SDimitry Andricdefm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32,
75530b57cec5SDimitry Andric                                    v4i32, v8i32, NoVLX>;
75548bcb0991SDimitry Andricdefm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64,
75550b57cec5SDimitry Andric                                    v2i64, v4i64, NoVLX>;
75560b57cec5SDimitry Andric
75570b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
75580b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
75590b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
75600b57cec5SDimitry Andric    def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
75610b57cec5SDimitry Andric              (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
75620b57cec5SDimitry Andric    def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
75630b57cec5SDimitry Andric              (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>;
75640b57cec5SDimitry Andric    def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
75650b57cec5SDimitry Andric              (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
75660b57cec5SDimitry Andric}
75670b57cec5SDimitry Andric
75680b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
75690b57cec5SDimitry Andric  def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
75708bcb0991SDimitry Andric        (VPBROADCASTBrr (VMOVDI2PDIrr
75710b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
75728bcb0991SDimitry Andric                                             GR8:$src, sub_8bit))))>;
75730b57cec5SDimitry Andric  def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
75748bcb0991SDimitry Andric        (VPBROADCASTBYrr (VMOVDI2PDIrr
75750b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
75768bcb0991SDimitry Andric                                              GR8:$src, sub_8bit))))>;
75770b57cec5SDimitry Andric
75780b57cec5SDimitry Andric  def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
75798bcb0991SDimitry Andric        (VPBROADCASTWrr (VMOVDI2PDIrr
75800b57cec5SDimitry Andric                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
75818bcb0991SDimitry Andric                                             GR16:$src, sub_16bit))))>;
75820b57cec5SDimitry Andric  def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
75838bcb0991SDimitry Andric        (VPBROADCASTWYrr (VMOVDI2PDIrr
75840b57cec5SDimitry Andric                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
75858bcb0991SDimitry Andric                                              GR16:$src, sub_16bit))))>;
75860b57cec5SDimitry Andric}
75870b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
75880b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
75898bcb0991SDimitry Andric            (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>;
75900b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
75918bcb0991SDimitry Andric            (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>;
75920b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
75938bcb0991SDimitry Andric            (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>;
75940b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
75958bcb0991SDimitry Andric            (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>;
75960b57cec5SDimitry Andric}
75970b57cec5SDimitry Andric
75980b57cec5SDimitry Andric// AVX1 broadcast patterns
75990b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
76008bcb0991SDimitry Andricdef : Pat<(v8i32 (X86VBroadcastld32 addr:$src)),
76010b57cec5SDimitry Andric          (VBROADCASTSSYrm addr:$src)>;
76028bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VBroadcastld64 addr:$src)),
76030b57cec5SDimitry Andric          (VBROADCASTSDYrm addr:$src)>;
76048bcb0991SDimitry Andricdef : Pat<(v4i32 (X86VBroadcastld32 addr:$src)),
76050b57cec5SDimitry Andric          (VBROADCASTSSrm addr:$src)>;
76060b57cec5SDimitry Andric}
76070b57cec5SDimitry Andric
76080b57cec5SDimitry Andric  // Provide fallback in case the load node that is used in the patterns above
76090b57cec5SDimitry Andric  // is used by additional users, which prevents the pattern selection.
76100b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in {
76110b57cec5SDimitry Andric  // 128bit broadcasts:
76120b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast f64:$src)),
76130b57cec5SDimitry Andric            (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>;
76148bcb0991SDimitry Andric  def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)),
76150b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
76160b57cec5SDimitry Andric
76170b57cec5SDimitry Andric  def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
76180b57cec5SDimitry Andric            (VMOVDDUPrr VR128:$src)>;
76190b57cec5SDimitry Andric}
76200b57cec5SDimitry Andric
76210b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
76220b57cec5SDimitry Andric  def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
76230b57cec5SDimitry Andric            (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>;
76240b57cec5SDimitry Andric  def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
76250b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
76260b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
76270b57cec5SDimitry Andric              (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
76280b57cec5SDimitry Andric  def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
76290b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
76300b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
76310b57cec5SDimitry Andric              (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
76320b57cec5SDimitry Andric
76330b57cec5SDimitry Andric  def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
76348bcb0991SDimitry Andric            (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
76350b57cec5SDimitry Andric  def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
76360b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
76378bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm),
76388bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>;
76390b57cec5SDimitry Andric  def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
76400b57cec5SDimitry Andric            (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
76418bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm),
76428bcb0991SDimitry Andric              (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>;
76430b57cec5SDimitry Andric
76440b57cec5SDimitry Andric  def : Pat<(v2i64 (X86VBroadcast i64:$src)),
76458bcb0991SDimitry Andric            (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>;
76468bcb0991SDimitry Andric  def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)),
76470b57cec5SDimitry Andric            (VMOVDDUPrm addr:$src)>;
76480b57cec5SDimitry Andric}
76490b57cec5SDimitry Andric
76500b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
76510b57cec5SDimitry Andric// VPERM - Permute instructions
76520b57cec5SDimitry Andric//
76530b57cec5SDimitry Andric
76540b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr,
76550b57cec5SDimitry Andric                     ValueType OpVT, X86FoldableSchedWrite Sched,
76560b57cec5SDimitry Andric                     X86MemOperand memOp> {
76570b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
76580b57cec5SDimitry Andric    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
76590b57cec5SDimitry Andric                     (ins VR256:$src1, VR256:$src2),
76600b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
76610b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
76620b57cec5SDimitry Andric                     [(set VR256:$dst,
76630b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
76640b57cec5SDimitry Andric                     Sched<[Sched]>, VEX_4V, VEX_L;
76650b57cec5SDimitry Andric    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
76660b57cec5SDimitry Andric                     (ins VR256:$src1, memOp:$src2),
76670b57cec5SDimitry Andric                     !strconcat(OpcodeStr,
76680b57cec5SDimitry Andric                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
76690b57cec5SDimitry Andric                     [(set VR256:$dst,
76700b57cec5SDimitry Andric                       (OpVT (X86VPermv VR256:$src1,
76710b57cec5SDimitry Andric                              (load addr:$src2))))]>,
76720b57cec5SDimitry Andric                     Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
76730b57cec5SDimitry Andric  }
76740b57cec5SDimitry Andric}
76750b57cec5SDimitry Andric
76760b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
76770b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in
76780b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
76790b57cec5SDimitry Andric
76800b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
76810b57cec5SDimitry Andric                         ValueType OpVT, X86FoldableSchedWrite Sched,
76820b57cec5SDimitry Andric                         X86MemOperand memOp> {
76830b57cec5SDimitry Andric  let Predicates = [HasAVX2, NoVLX] in {
76840b57cec5SDimitry Andric    def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
76850b57cec5SDimitry Andric                       (ins VR256:$src1, u8imm:$src2),
76860b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
76870b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
76880b57cec5SDimitry Andric                       [(set VR256:$dst,
76898bcb0991SDimitry Andric                         (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
76900b57cec5SDimitry Andric                       Sched<[Sched]>, VEX, VEX_L;
76910b57cec5SDimitry Andric    def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
76920b57cec5SDimitry Andric                       (ins memOp:$src1, u8imm:$src2),
76930b57cec5SDimitry Andric                       !strconcat(OpcodeStr,
76940b57cec5SDimitry Andric                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
76950b57cec5SDimitry Andric                       [(set VR256:$dst,
76960b57cec5SDimitry Andric                         (OpVT (X86VPermi (mem_frag addr:$src1),
76978bcb0991SDimitry Andric                                (i8 timm:$src2))))]>,
76980b57cec5SDimitry Andric                       Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
76990b57cec5SDimitry Andric  }
77000b57cec5SDimitry Andric}
77010b57cec5SDimitry Andric
77020b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
77030b57cec5SDimitry Andric                            WriteShuffle256, i256mem>, VEX_W;
77040b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in
77050b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
77060b57cec5SDimitry Andric                             WriteFShuffle256, f256mem>, VEX_W;
77070b57cec5SDimitry Andric
77080b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
7709e8d8bef9SDimitry Andric// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
77100b57cec5SDimitry Andric//
77110b57cec5SDimitry Andriclet isCommutable = 1 in
77120b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
77130b57cec5SDimitry Andric          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
7714e8d8bef9SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
7715e8d8bef9SDimitry Andric          Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
77160b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
77170b57cec5SDimitry Andric          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
7718e8d8bef9SDimitry Andric          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
77190b57cec5SDimitry Andric          Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
77200b57cec5SDimitry Andric
7721e8d8bef9SDimitry Andriclet Predicates = [HasAVX2] in {
7722e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v4i64,  loadv4i64>;
7723e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v8i32,  loadv8i32>;
7724e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
7725e8d8bef9SDimitry Andric  defm : vperm2x128_lowering<"VPERM2I128", v32i8,  loadv32i8>;
7726e8d8bef9SDimitry Andric}
77270b57cec5SDimitry Andric
77280b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
77290b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values
77300b57cec5SDimitry Andric//
77310b57cec5SDimitry Andriclet hasSideEffects = 0 in {
77320b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
77330b57cec5SDimitry Andric          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
77340b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
77350b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
77360b57cec5SDimitry Andriclet mayLoad = 1 in
77370b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
77380b57cec5SDimitry Andric          (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
77390b57cec5SDimitry Andric          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
77400b57cec5SDimitry Andric          []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
77410b57cec5SDimitry Andric}
77420b57cec5SDimitry Andric
77430b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
77440b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64,  loadv2i64>;
77450b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32,  loadv4i32>;
77460b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
77470b57cec5SDimitry Andric  defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8,  loadv16i8>;
77480b57cec5SDimitry Andric}
77490b57cec5SDimitry Andric
77500b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
77510b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values
77520b57cec5SDimitry Andric//
77530b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
77540b57cec5SDimitry Andric          (ins VR256:$src1, u8imm:$src2),
77550b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
77560b57cec5SDimitry Andric          Sched<[WriteShuffle256]>, VEX, VEX_L;
77570b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in
77580b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
77590b57cec5SDimitry Andric          (ins i128mem:$dst, VR256:$src1, u8imm:$src2),
77600b57cec5SDimitry Andric          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
77610b57cec5SDimitry Andric          Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L;
77620b57cec5SDimitry Andric
77630b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
77640b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v4i64,  v2i64>;
77650b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v8i32,  v4i32>;
77660b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
77670b57cec5SDimitry Andric  defm : vextract_lowering<"VEXTRACTI128", v32i8,  v16i8>;
77680b57cec5SDimitry Andric}
77690b57cec5SDimitry Andric
77700b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
77710b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
77720b57cec5SDimitry Andric//
77730b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr,
77740b57cec5SDimitry Andric                         Intrinsic IntLd128, Intrinsic IntLd256,
77755ffd83dbSDimitry Andric                         Intrinsic IntSt128, Intrinsic IntSt256,
77765ffd83dbSDimitry Andric                         X86SchedWriteMaskMove schedX,
77775ffd83dbSDimitry Andric                         X86SchedWriteMaskMove schedY> {
77780b57cec5SDimitry Andric  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
77790b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
77800b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77810b57cec5SDimitry Andric             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
77825ffd83dbSDimitry Andric             VEX_4V, Sched<[schedX.RM]>;
77830b57cec5SDimitry Andric  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
77840b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
77850b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77860b57cec5SDimitry Andric             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
77875ffd83dbSDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.RM]>;
77880b57cec5SDimitry Andric  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
77890b57cec5SDimitry Andric             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
77900b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77910b57cec5SDimitry Andric             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
77925ffd83dbSDimitry Andric             VEX_4V, Sched<[schedX.MR]>;
77930b57cec5SDimitry Andric  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
77940b57cec5SDimitry Andric             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
77950b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
77960b57cec5SDimitry Andric             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
77975ffd83dbSDimitry Andric             VEX_4V, VEX_L, Sched<[schedY.MR]>;
77980b57cec5SDimitry Andric}
77990b57cec5SDimitry Andric
78000b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
78010b57cec5SDimitry Andric                                int_x86_avx2_maskload_d,
78020b57cec5SDimitry Andric                                int_x86_avx2_maskload_d_256,
78030b57cec5SDimitry Andric                                int_x86_avx2_maskstore_d,
78045ffd83dbSDimitry Andric                                int_x86_avx2_maskstore_d_256,
78055ffd83dbSDimitry Andric                                WriteVecMaskMove32, WriteVecMaskMove32Y>;
78060b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
78070b57cec5SDimitry Andric                                int_x86_avx2_maskload_q,
78080b57cec5SDimitry Andric                                int_x86_avx2_maskload_q_256,
78090b57cec5SDimitry Andric                                int_x86_avx2_maskstore_q,
78105ffd83dbSDimitry Andric                                int_x86_avx2_maskstore_q_256,
78115ffd83dbSDimitry Andric                                WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
78120b57cec5SDimitry Andric
78130b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
78148bcb0991SDimitry Andric                          ValueType MaskVT> {
78150b57cec5SDimitry Andric    // masked store
78160b57cec5SDimitry Andric    def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
78170b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
78180b57cec5SDimitry Andric    // masked load
78190b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
78200b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
78210b57cec5SDimitry Andric    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
78220b57cec5SDimitry Andric                              (VT immAllZerosV))),
78230b57cec5SDimitry Andric             (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
78240b57cec5SDimitry Andric}
78250b57cec5SDimitry Andriclet Predicates = [HasAVX] in {
78268bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>;
78278bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>;
78288bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>;
78298bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>;
78300b57cec5SDimitry Andric}
78310b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in {
78320b57cec5SDimitry Andric  // load/store i32/i64 not supported use ps/pd version
78338bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>;
78348bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>;
78358bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>;
78368bcb0991SDimitry Andric  defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>;
78370b57cec5SDimitry Andric}
78380b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
78398bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>;
78408bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>;
78418bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>;
78428bcb0991SDimitry Andric  defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>;
78430b57cec5SDimitry Andric}
78440b57cec5SDimitry Andric
78450b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
78460b57cec5SDimitry Andric// Variable Bit Shifts
78470b57cec5SDimitry Andric//
78480b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
78490b57cec5SDimitry Andric                          ValueType vt128, ValueType vt256> {
78500b57cec5SDimitry Andric  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
78510b57cec5SDimitry Andric             (ins VR128:$src1, VR128:$src2),
78520b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78530b57cec5SDimitry Andric             [(set VR128:$dst,
78540b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
78550b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>;
78560b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
78570b57cec5SDimitry Andric             (ins VR128:$src1, i128mem:$src2),
78580b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78590b57cec5SDimitry Andric             [(set VR128:$dst,
78600b57cec5SDimitry Andric               (vt128 (OpNode VR128:$src1,
78610b57cec5SDimitry Andric                       (vt128 (load addr:$src2)))))]>,
78620b57cec5SDimitry Andric             VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
78630b57cec5SDimitry Andric                            SchedWriteVarVecShift.XMM.ReadAfterFold]>;
78640b57cec5SDimitry Andric  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
78650b57cec5SDimitry Andric             (ins VR256:$src1, VR256:$src2),
78660b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78670b57cec5SDimitry Andric             [(set VR256:$dst,
78680b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
78690b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>;
78700b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
78710b57cec5SDimitry Andric             (ins VR256:$src1, i256mem:$src2),
78720b57cec5SDimitry Andric             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
78730b57cec5SDimitry Andric             [(set VR256:$dst,
78740b57cec5SDimitry Andric               (vt256 (OpNode VR256:$src1,
78750b57cec5SDimitry Andric                       (vt256 (load addr:$src2)))))]>,
78760b57cec5SDimitry Andric             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
78770b57cec5SDimitry Andric                                   SchedWriteVarVecShift.YMM.ReadAfterFold]>;
78780b57cec5SDimitry Andric}
78790b57cec5SDimitry Andric
78800b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in {
78810b57cec5SDimitry Andric  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
78820b57cec5SDimitry Andric  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
78830b57cec5SDimitry Andric  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
78840b57cec5SDimitry Andric  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
78850b57cec5SDimitry Andric  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
78860b57cec5SDimitry Andric}
78870b57cec5SDimitry Andric
78880b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
78890b57cec5SDimitry Andric// VGATHER - GATHER Operations
78900b57cec5SDimitry Andric
78910b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions.
78920b57cec5SDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
78935ffd83dbSDimitry Andric                       ValueType VTy, RegisterClass RC256,
78940b57cec5SDimitry Andric                       X86MemOperand memop128, X86MemOperand memop256,
78950b57cec5SDimitry Andric                       ValueType MTx = VTx, ValueType MTy = VTy> {
78965ffd83dbSDimitry Andriclet mayLoad = 1, hasSideEffects = 0 in {
78970b57cec5SDimitry Andric  def rm  : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
78980b57cec5SDimitry Andric            (ins VR128:$src1, memop128:$src2, VR128:$mask),
78990b57cec5SDimitry Andric            !strconcat(OpcodeStr,
79000b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
7901*fe6060f1SDimitry Andric            []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
79020b57cec5SDimitry Andric  def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
79030b57cec5SDimitry Andric            (ins RC256:$src1, memop256:$src2, RC256:$mask),
79040b57cec5SDimitry Andric            !strconcat(OpcodeStr,
79050b57cec5SDimitry Andric              "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
7906*fe6060f1SDimitry Andric            []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
79075ffd83dbSDimitry Andric}
79080b57cec5SDimitry Andric}
79090b57cec5SDimitry Andric
79100b57cec5SDimitry Andriclet Predicates = [HasAVX2] in {
79110b57cec5SDimitry Andric  let mayLoad = 1, hasSideEffects = 0, Constraints
79120b57cec5SDimitry Andric    = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
79130b57cec5SDimitry Andric    in {
79145ffd83dbSDimitry Andric    defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64,
79155ffd83dbSDimitry Andric                        VR256, vx128mem, vx256mem>, VEX_W;
79165ffd83dbSDimitry Andric    defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64,
79175ffd83dbSDimitry Andric                        VR256, vx128mem, vy256mem>, VEX_W;
79185ffd83dbSDimitry Andric    defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32,
79195ffd83dbSDimitry Andric                        VR256, vx128mem, vy256mem>;
79205ffd83dbSDimitry Andric    defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32,
79215ffd83dbSDimitry Andric                        VR128, vx64mem, vy128mem>;
79220b57cec5SDimitry Andric
79230b57cec5SDimitry Andric    let ExeDomain = SSEPackedDouble in {
79245ffd83dbSDimitry Andric      defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64,
79255ffd83dbSDimitry Andric                          VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W;
79265ffd83dbSDimitry Andric      defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64,
79275ffd83dbSDimitry Andric                          VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W;
79280b57cec5SDimitry Andric    }
79290b57cec5SDimitry Andric
79300b57cec5SDimitry Andric    let ExeDomain = SSEPackedSingle in {
79315ffd83dbSDimitry Andric      defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32,
79325ffd83dbSDimitry Andric                          VR256, vx128mem, vy256mem, v4i32, v8i32>;
79335ffd83dbSDimitry Andric      defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32,
79345ffd83dbSDimitry Andric                          VR128, vx64mem, vy128mem, v4i32, v4i32>;
79350b57cec5SDimitry Andric    }
79360b57cec5SDimitry Andric  }
79370b57cec5SDimitry Andric}
79380b57cec5SDimitry Andric
79390b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
79400b57cec5SDimitry Andric// GFNI instructions
79410b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
79420b57cec5SDimitry Andric
79430b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
79440b57cec5SDimitry Andric                        RegisterClass RC, PatFrag MemOpFrag,
79450b57cec5SDimitry Andric                        X86MemOperand X86MemOp, bit Is2Addr = 0> {
79460b57cec5SDimitry Andric  let ExeDomain = SSEPackedInt,
79470b57cec5SDimitry Andric      AsmString = !if(Is2Addr,
79485ffd83dbSDimitry Andric        OpcodeStr#"\t{$src2, $dst|$dst, $src2}",
79495ffd83dbSDimitry Andric        OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
79500b57cec5SDimitry Andric    let isCommutable = 1 in
79510b57cec5SDimitry Andric    def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
79520b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
79530b57cec5SDimitry Andric             Sched<[SchedWriteVecALU.XMM]>, T8PD;
79540b57cec5SDimitry Andric
79550b57cec5SDimitry Andric    def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
79560b57cec5SDimitry Andric                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
79570b57cec5SDimitry Andric                                 (MemOpFrag addr:$src2))))]>,
79580b57cec5SDimitry Andric             Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
79590b57cec5SDimitry Andric  }
79600b57cec5SDimitry Andric}
79610b57cec5SDimitry Andric
79620b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
79630b57cec5SDimitry Andric                           SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
79640b57cec5SDimitry Andric                           X86MemOperand X86MemOp, bit Is2Addr = 0> {
79650b57cec5SDimitry Andric  let AsmString = !if(Is2Addr,
79665ffd83dbSDimitry Andric      OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
79675ffd83dbSDimitry Andric      OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
79680b57cec5SDimitry Andric  def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
79690b57cec5SDimitry Andric              (ins RC:$src1, RC:$src2, u8imm:$src3), "",
79708bcb0991SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
79710b57cec5SDimitry Andric              SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
79720b57cec5SDimitry Andric  def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
79730b57cec5SDimitry Andric              (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
79740b57cec5SDimitry Andric              [(set RC:$dst, (OpVT (OpNode RC:$src1,
79750b57cec5SDimitry Andric                                    (MemOpFrag addr:$src2),
79768bcb0991SDimitry Andric                              timm:$src3)))], SSEPackedInt>,
79770b57cec5SDimitry Andric              Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
79780b57cec5SDimitry Andric  }
79790b57cec5SDimitry Andric}
79800b57cec5SDimitry Andric
79810b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
79820b57cec5SDimitry Andric  let Constraints = "$src1 = $dst",
79830b57cec5SDimitry Andric      Predicates  = [HasGFNI, UseSSE2] in
79840b57cec5SDimitry Andric  defm NAME         : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
79850b57cec5SDimitry Andric                                      VR128, load, i128mem, 1>;
79860b57cec5SDimitry Andric  let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
79875ffd83dbSDimitry Andric    defm V#NAME    : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
79880b57cec5SDimitry Andric                                      load, i128mem>, VEX_4V, VEX_W;
79895ffd83dbSDimitry Andric    defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
79900b57cec5SDimitry Andric                                      load, i256mem>, VEX_4V, VEX_L, VEX_W;
79910b57cec5SDimitry Andric  }
79920b57cec5SDimitry Andric}
79930b57cec5SDimitry Andric
79940b57cec5SDimitry Andric// GF2P8MULB
79950b57cec5SDimitry Andriclet Constraints = "$src1 = $dst",
79960b57cec5SDimitry Andric    Predicates  = [HasGFNI, UseSSE2] in
79970b57cec5SDimitry Andricdefm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
79980b57cec5SDimitry Andric                                    i128mem, 1>;
79990b57cec5SDimitry Andriclet Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
80000b57cec5SDimitry Andric  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
80010b57cec5SDimitry Andric                                   i128mem>, VEX_4V;
80020b57cec5SDimitry Andric  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
80030b57cec5SDimitry Andric                                   i256mem>, VEX_4V, VEX_L;
80040b57cec5SDimitry Andric}
80050b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB
80060b57cec5SDimitry Andriclet isCommutable = 0 in {
80070b57cec5SDimitry Andric  defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb",
80080b57cec5SDimitry Andric                                             X86GF2P8affineinvqb>, TAPD;
80090b57cec5SDimitry Andric  defm GF2P8AFFINEQB    : GF2P8AFFINE_common<0xCE, "gf2p8affineqb",
80100b57cec5SDimitry Andric                                             X86GF2P8affineqb>, TAPD;
80110b57cec5SDimitry Andric}
80120b57cec5SDimitry Andric
8013