10b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions, 100b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation, 110b57cec5SDimitry Andric// machine code emission, and analysis. 120b57cec5SDimitry Andric// 130b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes 170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 200b57cec5SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 210b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop, 220b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 230b57cec5SDimitry Andric bit Is2Addr = 1> { 240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 250b57cec5SDimitry Andric let isCommutable = 1 in { 260b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 270b57cec5SDimitry Andric !if(Is2Addr, 280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 300b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 310b57cec5SDimitry Andric Sched<[sched]>; 320b57cec5SDimitry Andric } 330b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 340b57cec5SDimitry Andric !if(Is2Addr, 350b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 360b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 370b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 380b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 390b57cec5SDimitry Andric} 400b57cec5SDimitry Andric} 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 430b57cec5SDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, 440b57cec5SDimitry Andric SDPatternOperator OpNode, RegisterClass RC, 450b57cec5SDimitry Andric ValueType VT, string asm, Operand memopr, 460b57cec5SDimitry Andric ComplexPattern mem_cpat, Domain d, 470b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 480b57cec5SDimitry Andriclet hasSideEffects = 0 in { 490b57cec5SDimitry Andric def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 500b57cec5SDimitry Andric !if(Is2Addr, 510b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 520b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 530b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 540b57cec5SDimitry Andric Sched<[sched]>; 550b57cec5SDimitry Andric let mayLoad = 1 in 560b57cec5SDimitry Andric def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 570b57cec5SDimitry Andric !if(Is2Addr, 580b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 590b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 600b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>, 610b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 620b57cec5SDimitry Andric} 630b57cec5SDimitry Andric} 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class 660b57cec5SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 670b57cec5SDimitry Andric RegisterClass RC, ValueType vt, 680b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, 690b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 700b57cec5SDimitry Andric bit Is2Addr = 1> { 710b57cec5SDimitry Andric let isCommutable = 1 in 720b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 730b57cec5SDimitry Andric !if(Is2Addr, 740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 760b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 770b57cec5SDimitry Andric Sched<[sched]>; 780b57cec5SDimitry Andric let mayLoad = 1 in 790b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 800b57cec5SDimitry Andric !if(Is2Addr, 810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 830b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 840b57cec5SDimitry Andric d>, 850b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 860b57cec5SDimitry Andric} 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 890b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 900b57cec5SDimitry Andric string OpcodeStr, X86MemOperand x86memop, 910b57cec5SDimitry Andric X86FoldableSchedWrite sched, 920b57cec5SDimitry Andric list<dag> pat_rr, list<dag> pat_rm, 930b57cec5SDimitry Andric bit Is2Addr = 1> { 940b57cec5SDimitry Andric let isCommutable = 1, hasSideEffects = 0 in 950b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 960b57cec5SDimitry Andric !if(Is2Addr, 970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 990b57cec5SDimitry Andric pat_rr, d>, 1000b57cec5SDimitry Andric Sched<[sched]>; 1010b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 1020b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 1030b57cec5SDimitry Andric !if(Is2Addr, 1040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 1050b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1060b57cec5SDimitry Andric pat_rm, d>, 1070b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1080b57cec5SDimitry Andric} 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 1120b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos. 1130b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1140b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 1150b57cec5SDimitry Andric def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 1160b57cec5SDimitry Andric [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1170b57cec5SDimitry Andric def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 118*8bcb0991SDimitry Andric [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; 119*8bcb0991SDimitry Andric def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 120*8bcb0991SDimitry Andric [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1210b57cec5SDimitry Andric} 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1240b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors 1250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse. 1280b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 1290b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor. 1300b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1310b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial. 1320b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 133*8bcb0991SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1340b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1350b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 immAllZerosV))]>; 1360b57cec5SDimitry Andric} 1370b57cec5SDimitry Andric 138*8bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 139*8bcb0991SDimitry Andricdef : Pat<(v16i8 immAllZerosV), (V_SET0)>; 140*8bcb0991SDimitry Andricdef : Pat<(v8i16 immAllZerosV), (V_SET0)>; 1410b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>; 142*8bcb0991SDimitry Andricdef : Pat<(v2i64 immAllZerosV), (V_SET0)>; 143*8bcb0991SDimitry Andricdef : Pat<(v2f64 immAllZerosV), (V_SET0)>; 144*8bcb0991SDimitry Andric} 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 1480b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero 1490b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY 1500b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty 1510b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1520b57cec5SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1530b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1540b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllZerosV))]>; 1550b57cec5SDimitry Andric} 1560b57cec5SDimitry Andric 157*8bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 158*8bcb0991SDimitry Andricdef : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 159*8bcb0991SDimitry Andricdef : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 160*8bcb0991SDimitry Andricdef : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 161*8bcb0991SDimitry Andricdef : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; 162*8bcb0991SDimitry Andricdef : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 163*8bcb0991SDimitry Andric} 164*8bcb0991SDimitry Andric 1650b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1660b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial. 1670b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1680b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 1690b57cec5SDimitry Andric def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1700b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 immAllOnesV))]>; 1710b57cec5SDimitry Andric let Predicates = [HasAVX1Only, OptForMinSize] in { 1720b57cec5SDimitry Andric def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 1730b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric let Predicates = [HasAVX2] in 1760b57cec5SDimitry Andric def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1770b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1780b57cec5SDimitry Andric} 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1810b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions 1820b57cec5SDimitry Andric// 1830b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 1840b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register 1850b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 1860b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we 1870b57cec5SDimitry Andric// don't use movss/movsd for copies. 1880b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt, 1910b57cec5SDimitry Andric X86MemOperand x86memop, string base_opc, 1920b57cec5SDimitry Andric string asm_opr, Domain d, string Name> { 1930b57cec5SDimitry Andric let isCommutable = 1 in 1940b57cec5SDimitry Andric def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 1950b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 1960b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), 1970b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 1980b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric // For the disassembler 2010b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 2020b57cec5SDimitry Andric def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 2030b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 2040b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), []>, 2050b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; 2060b57cec5SDimitry Andric} 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 2090b57cec5SDimitry Andric X86MemOperand x86memop, string OpcodeStr, 2100b57cec5SDimitry Andric Domain d, string Name, Predicate pred> { 2110b57cec5SDimitry Andric // AVX 2120b57cec5SDimitry Andric let Predicates = [UseAVX, OptForSize] in 2130b57cec5SDimitry Andric defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 2140b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, 2150b57cec5SDimitry Andric "V"#Name>, 2160b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2200b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 2210b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; 2220b57cec5SDimitry Andric // SSE1 & 2 2230b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2240b57cec5SDimitry Andric let Predicates = [pred, NoSSE41_Or_OptForSize] in 2250b57cec5SDimitry Andric defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 2260b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}", d, Name>; 2270b57cec5SDimitry Andric } 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2300b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2310b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 2320b57cec5SDimitry Andric Sched<[WriteFStore]>; 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2350b57cec5SDimitry Andric (!cast<Instruction>("V"#NAME#"rr_REV") 2360b57cec5SDimitry Andric VR128:$dst, VR128:$src1, VR128:$src2), 0>; 2370b57cec5SDimitry Andric def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 2380b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rr_REV") 2390b57cec5SDimitry Andric VR128:$dst, VR128:$src2), 0>; 2400b57cec5SDimitry Andric} 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits. 2430b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 2440b57cec5SDimitry Andric PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 2450b57cec5SDimitry Andric Domain d> { 2460b57cec5SDimitry Andric def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2470b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2480b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 2490b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 2500b57cec5SDimitry Andric def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2510b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2520b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 2530b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric // _alt version uses FR32/FR64 register class. 2560b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 2570b57cec5SDimitry Andric def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2580b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2590b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 2600b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 2610b57cec5SDimitry Andric def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2620b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2630b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 2640b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric} 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 2690b57cec5SDimitry Andric SSEPackedSingle, "MOVSS", UseSSE1>, XS; 2700b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 2710b57cec5SDimitry Andric SSEPackedDouble, "MOVSD", UseSSE2>, XD; 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in { 2740b57cec5SDimitry Andric defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 2750b57cec5SDimitry Andric SSEPackedSingle>, XS; 2760b57cec5SDimitry Andric defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 2770b57cec5SDimitry Andric SSEPackedDouble>, XD; 2780b57cec5SDimitry Andric} 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric// Patterns 2810b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 2820b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 2830b57cec5SDimitry Andric (VMOVSSrm addr:$src)>; 2840b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 2850b57cec5SDimitry Andric (VMOVSDrm addr:$src)>; 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric // Represent the same patterns above but in the form they appear for 2880b57cec5SDimitry Andric // 256-bit types 2890b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzload32 addr:$src)), 2900b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 2910b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzload64 addr:$src)), 2920b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 2930b57cec5SDimitry Andric} 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 2960b57cec5SDimitry Andric // Move scalar to XMM zero-extended, zeroing a VR128 then do a 2970b57cec5SDimitry Andric // MOVSS to the lower bits. 2980b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 2990b57cec5SDimitry Andric (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3000b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3010b57cec5SDimitry Andric (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric // Move low f32 and clear high bits. 3040b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 3050b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3060b57cec5SDimitry Andric (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 3070b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 3080b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 3090b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3100b57cec5SDimitry Andric (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 3110b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 3120b57cec5SDimitry Andric} 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 3150b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a 3160b57cec5SDimitry Andric// MOVSS to the lower bits. 3170b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 3180b57cec5SDimitry Andric (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3190b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3200b57cec5SDimitry Andric (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3210b57cec5SDimitry Andric} 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andriclet Predicates = [UseSSE2] in 3240b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 3250b57cec5SDimitry Andric (MOVSDrm addr:$src)>; 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andriclet Predicates = [UseSSE1] in 3280b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 3290b57cec5SDimitry Andric (MOVSSrm addr:$src)>; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3320b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 3330b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC, 3360b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag ld_frag, 3370b57cec5SDimitry Andric string asm, Domain d, 3380b57cec5SDimitry Andric X86SchedWriteMoveLS sched> { 3390b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in 3400b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 3410b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 3420b57cec5SDimitry Andric Sched<[sched.RR]>; 3430b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in 3440b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 3450b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3460b57cec5SDimitry Andric [(set RC:$dst, (ld_frag addr:$src))], d>, 3470b57cec5SDimitry Andric Sched<[sched.RM]>; 3480b57cec5SDimitry Andric} 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3510b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3520b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3530b57cec5SDimitry Andric PS, VEX, VEX_WIG; 3540b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3550b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3560b57cec5SDimitry Andric PD, VEX, VEX_WIG; 3570b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3580b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3590b57cec5SDimitry Andric PS, VEX, VEX_WIG; 3600b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3610b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3620b57cec5SDimitry Andric PD, VEX, VEX_WIG; 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 3650b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 3660b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 3670b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 3680b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 3690b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 3700b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 3710b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 3720b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 3730b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 3740b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 3750b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 3760b57cec5SDimitry Andric} 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 3790b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3800b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3810b57cec5SDimitry Andric PS; 3820b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3830b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3840b57cec5SDimitry Andric PS; 3850b57cec5SDimitry Andric} 3860b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 3870b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3880b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3890b57cec5SDimitry Andric PD; 3900b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3910b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3920b57cec5SDimitry Andric PD; 3930b57cec5SDimitry Andric} 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3960b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 3970b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3980b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 3990b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 4000b57cec5SDimitry Andric VEX, VEX_WIG; 4010b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4020b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4030b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 4040b57cec5SDimitry Andric VEX, VEX_WIG; 4050b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4060b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4070b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>, 4080b57cec5SDimitry Andric VEX, VEX_WIG; 4090b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4100b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4110b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>, 4120b57cec5SDimitry Andric VEX, VEX_WIG; 4130b57cec5SDimitry Andric} // SchedRW 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 4160b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4170b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4180b57cec5SDimitry Andric [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 4190b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4200b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4210b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4220b57cec5SDimitry Andric [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 4230b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4240b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4250b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4260b57cec5SDimitry Andric [(store (v8f32 VR256:$src), addr:$dst)]>, 4270b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4280b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4290b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4300b57cec5SDimitry Andric [(store (v4f64 VR256:$src), addr:$dst)]>, 4310b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4320b57cec5SDimitry Andric} // SchedRW 4330b57cec5SDimitry Andric} // Predicate 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric// For disassembler 4360b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4370b57cec5SDimitry Andric isMoveReg = 1 in { 4380b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 4390b57cec5SDimitry Andric def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 4400b57cec5SDimitry Andric (ins VR128:$src), 4410b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 4420b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; 4430b57cec5SDimitry Andric def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 4440b57cec5SDimitry Andric (ins VR128:$src), 4450b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 4460b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; 4470b57cec5SDimitry Andric def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 4480b57cec5SDimitry Andric (ins VR128:$src), 4490b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 4500b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; 4510b57cec5SDimitry Andric def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 4520b57cec5SDimitry Andric (ins VR128:$src), 4530b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 4540b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; 4550b57cec5SDimitry Andric} // SchedRW 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 4580b57cec5SDimitry Andric def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 4590b57cec5SDimitry Andric (ins VR256:$src), 4600b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 4610b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; 4620b57cec5SDimitry Andric def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 4630b57cec5SDimitry Andric (ins VR256:$src), 4640b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 4650b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; 4660b57cec5SDimitry Andric def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 4670b57cec5SDimitry Andric (ins VR256:$src), 4680b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 4690b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; 4700b57cec5SDimitry Andric def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 4710b57cec5SDimitry Andric (ins VR256:$src), 4720b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 4730b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; 4740b57cec5SDimitry Andric} // SchedRW 4750b57cec5SDimitry Andric} // Predicate 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 4780b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4790b57cec5SDimitry Andric (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 4800b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4810b57cec5SDimitry Andric (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 4820b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4830b57cec5SDimitry Andric (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 4840b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4850b57cec5SDimitry Andric (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 4860b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4870b57cec5SDimitry Andric (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 4880b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4890b57cec5SDimitry Andric (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 4900b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4910b57cec5SDimitry Andric (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 4920b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4930b57cec5SDimitry Andric (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 4960b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4970b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4980b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 4990b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5000b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 5010b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 5020b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5030b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 5040b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>; 5050b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5060b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 5070b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>; 5080b57cec5SDimitry Andric} // SchedRW 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric// For disassembler 5110b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 5120b57cec5SDimitry Andric isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 5130b57cec5SDimitry Andric def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5140b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 5150b57cec5SDimitry Andric FoldGenData<"MOVAPSrr">; 5160b57cec5SDimitry Andric def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5170b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 5180b57cec5SDimitry Andric FoldGenData<"MOVAPDrr">; 5190b57cec5SDimitry Andric def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5200b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 5210b57cec5SDimitry Andric FoldGenData<"MOVUPSrr">; 5220b57cec5SDimitry Andric def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5230b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 5240b57cec5SDimitry Andric FoldGenData<"MOVUPDrr">; 5250b57cec5SDimitry Andric} 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 5280b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 5290b57cec5SDimitry Andric (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 5300b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 5310b57cec5SDimitry Andric (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 5320b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 5330b57cec5SDimitry Andric (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 5340b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 5350b57cec5SDimitry Andric (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5380b57cec5SDimitry Andric // 256-bit load/store need to use floating point load/store in case we don't 5390b57cec5SDimitry Andric // have AVX2. Execution domain fixing will convert to integer if AVX2 is 5400b57cec5SDimitry Andric // available and changing the domain is beneficial. 5410b57cec5SDimitry Andric def : Pat<(alignedloadv4i64 addr:$src), 5420b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5430b57cec5SDimitry Andric def : Pat<(alignedloadv8i32 addr:$src), 5440b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5450b57cec5SDimitry Andric def : Pat<(alignedloadv16i16 addr:$src), 5460b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5470b57cec5SDimitry Andric def : Pat<(alignedloadv32i8 addr:$src), 5480b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5490b57cec5SDimitry Andric def : Pat<(loadv4i64 addr:$src), 5500b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5510b57cec5SDimitry Andric def : Pat<(loadv8i32 addr:$src), 5520b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5530b57cec5SDimitry Andric def : Pat<(loadv16i16 addr:$src), 5540b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5550b57cec5SDimitry Andric def : Pat<(loadv32i8 addr:$src), 5560b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 5590b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5600b57cec5SDimitry Andric def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 5610b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5620b57cec5SDimitry Andric def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 5630b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5640b57cec5SDimitry Andric def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 5650b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5660b57cec5SDimitry Andric def : Pat<(store (v4i64 VR256:$src), addr:$dst), 5670b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5680b57cec5SDimitry Andric def : Pat<(store (v8i32 VR256:$src), addr:$dst), 5690b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5700b57cec5SDimitry Andric def : Pat<(store (v16i16 VR256:$src), addr:$dst), 5710b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5720b57cec5SDimitry Andric def : Pat<(store (v32i8 VR256:$src), addr:$dst), 5730b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5740b57cec5SDimitry Andric} 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter). 5770b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU 5780b57cec5SDimitry Andric// during the SSE domain pass. 5790b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 5800b57cec5SDimitry Andric def : Pat<(alignedloadv2i64 addr:$src), 5810b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 5820b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 5830b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 5840b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 5850b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 5860b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 5870b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 5880b57cec5SDimitry Andric def : Pat<(loadv2i64 addr:$src), 5890b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 5900b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 5910b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 5920b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 5930b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 5940b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 5950b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 5980b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 5990b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 6000b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6010b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 6020b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6030b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 6040b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6050b57cec5SDimitry Andric def : Pat<(store (v2i64 VR128:$src), addr:$dst), 6060b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6070b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 6080b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6090b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 6100b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6110b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 6120b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6130b57cec5SDimitry Andric} 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6160b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions 6170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode, 6200b57cec5SDimitry Andric string base_opc, string asm_opr> { 6210b57cec5SDimitry Andric // No pattern as they need be special cased between high and low. 6220b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 6230b57cec5SDimitry Andric def PSrm : PI<opc, MRMSrcMem, 6240b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6250b57cec5SDimitry Andric !strconcat(base_opc, "s", asm_opr), 6260b57cec5SDimitry Andric [], SSEPackedSingle>, PS, 6270b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric def PDrm : PI<opc, MRMSrcMem, 6300b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6310b57cec5SDimitry Andric !strconcat(base_opc, "d", asm_opr), 6320b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 6330b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src2)))))], 6340b57cec5SDimitry Andric SSEPackedDouble>, PD, 6350b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6360b57cec5SDimitry Andric} 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 6390b57cec5SDimitry Andric string base_opc> { 6400b57cec5SDimitry Andric let Predicates = [UseAVX] in 6410b57cec5SDimitry Andric defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6420b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 6430b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 6460b57cec5SDimitry Andric defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6470b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}">; 6480b57cec5SDimitry Andric} 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 6530b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 6540b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 6550b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6560b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 6570b57cec5SDimitry Andric []>, 6580b57cec5SDimitry Andric VEX, VEX_WIG; 6590b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6600b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 6610b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 6620b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 6630b57cec5SDimitry Andric VEX, VEX_WIG; 6640b57cec5SDimitry Andric}// UseAVX 6650b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 6660b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6670b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 6680b57cec5SDimitry Andric []>; 6690b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6700b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 6710b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 6720b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 6730b57cec5SDimitry Andric} // SchedRW 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 6760b57cec5SDimitry Andric // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 6770b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 6780b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 679*8bcb0991SDimitry Andric def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, 6800b57cec5SDimitry Andric (i8 -28)), 6810b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 6820b57cec5SDimitry Andric def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 6830b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzload64 addr:$src)), 6860b57cec5SDimitry Andric (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 6870b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 6880b57cec5SDimitry Andric (MOVLPSmr addr:$dst, VR128:$src)>; 6890b57cec5SDimitry Andric} 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6920b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions 6930b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 6980b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low 6990b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible. 7000b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7010b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7020b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7030b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 7040b57cec5SDimitry Andric []>, VEX, VEX_WIG; 7050b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7060b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7070b57cec5SDimitry Andric [(store (f64 (extractelt 7080b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 7090b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; 7100b57cec5SDimitry Andric} // UseAVX 7110b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7120b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7130b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 7140b57cec5SDimitry Andric []>; 7150b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7160b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7170b57cec5SDimitry Andric [(store (f64 (extractelt 7180b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 7190b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 7200b57cec5SDimitry Andric} // SchedRW 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7230b57cec5SDimitry Andric // Also handle an i64 load because that may get selected as a faster way to 7240b57cec5SDimitry Andric // load the data. 7250b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 7260b57cec5SDimitry Andric (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 7270b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 7280b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7290b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7320b57cec5SDimitry Andric (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 7330b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7340b57cec5SDimitry Andric (VMOVHPDmr addr:$dst, VR128:$src)>; 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric // MOVLPD patterns 7370b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 7380b57cec5SDimitry Andric (VMOVLPDrm VR128:$src1, addr:$src2)>; 7390b57cec5SDimitry Andric} 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 7420b57cec5SDimitry Andric // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 7430b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 7440b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 745*8bcb0991SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), 7460b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7470b57cec5SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 7480b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 7510b57cec5SDimitry Andric addr:$dst), 7520b57cec5SDimitry Andric (MOVHPSmr addr:$dst, VR128:$src)>; 7530b57cec5SDimitry Andric} 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 7560b57cec5SDimitry Andric // MOVHPD patterns 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric // Also handle an i64 load because that may get selected as a faster way to 7590b57cec5SDimitry Andric // load the data. 7600b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 7610b57cec5SDimitry Andric (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 7620b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 7630b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7640b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7670b57cec5SDimitry Andric (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 7680b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7690b57cec5SDimitry Andric (MOVHPDmr addr:$dst, VR128:$src)>; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric // MOVLPD patterns 7720b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 7730b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 7740b57cec5SDimitry Andric} 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 7770b57cec5SDimitry Andric // Use MOVLPD to load into the low bits from a full vector unless we can use 7780b57cec5SDimitry Andric // BLENDPD. 779*8bcb0991SDimitry Andric def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), 7800b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 7810b57cec5SDimitry Andric} 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7840b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 7850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7880b57cec5SDimitry Andric def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 7890b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 7900b57cec5SDimitry Andric "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7910b57cec5SDimitry Andric [(set VR128:$dst, 7920b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 7930b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; 7940b57cec5SDimitry Andric let isCommutable = 1 in 7950b57cec5SDimitry Andric def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 7960b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 7970b57cec5SDimitry Andric "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7980b57cec5SDimitry Andric [(set VR128:$dst, 7990b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 8000b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, 8010b57cec5SDimitry Andric NotMemoryFoldable; 8020b57cec5SDimitry Andric} 8030b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 8040b57cec5SDimitry Andric def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 8050b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8060b57cec5SDimitry Andric "movlhps\t{$src2, $dst|$dst, $src2}", 8070b57cec5SDimitry Andric [(set VR128:$dst, 8080b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 8090b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 8100b57cec5SDimitry Andric let isCommutable = 1 in 8110b57cec5SDimitry Andric def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 8120b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8130b57cec5SDimitry Andric "movhlps\t{$src2, $dst|$dst, $src2}", 8140b57cec5SDimitry Andric [(set VR128:$dst, 8150b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 8160b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; 8170b57cec5SDimitry Andric} 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8200b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions 8210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 8240b57cec5SDimitry Andric SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 8250b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 8260b57cec5SDimitry Andric SchedRead Int2Fpu = ReadDefault> { 8270b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 8280b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 8290b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 8300b57cec5SDimitry Andric Sched<[sched, Int2Fpu]>; 8310b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 8320b57cec5SDimitry Andric mem#"\t{$src, $dst|$dst, $src}", 8330b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 8340b57cec5SDimitry Andric Sched<[sched.Folded]>; 8350b57cec5SDimitry Andric} 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 8380b57cec5SDimitry Andric ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 8390b57cec5SDimitry Andric string asm, Domain d, X86FoldableSchedWrite sched> { 8400b57cec5SDimitry Andriclet hasSideEffects = 0 in { 8410b57cec5SDimitry Andric def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 8420b57cec5SDimitry Andric [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>, 8430b57cec5SDimitry Andric Sched<[sched]>; 8440b57cec5SDimitry Andric let mayLoad = 1 in 8450b57cec5SDimitry Andric def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 8460b57cec5SDimitry Andric [(set RC:$dst, (DstTy (sint_to_fp 8470b57cec5SDimitry Andric (SrcTy (ld_frag addr:$src)))))], d>, 8480b57cec5SDimitry Andric Sched<[sched.Folded]>; 8490b57cec5SDimitry Andric} 8500b57cec5SDimitry Andric} 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 8530b57cec5SDimitry Andric X86MemOperand x86memop, string asm, string mem, 8540b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 8550b57cec5SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX] in { 8560b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 8570b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 8580b57cec5SDimitry Andric Sched<[sched, ReadDefault, ReadInt2Fpu]>; 8590b57cec5SDimitry Andric let mayLoad = 1 in 8600b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 8610b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src), 8620b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 8630b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 8640b57cec5SDimitry Andric} // hasSideEffects = 0 8650b57cec5SDimitry Andric} 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX] in { 8680b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 8690b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 8700b57cec5SDimitry Andric WriteCvtSS2I>, 8710b57cec5SDimitry Andric XS, VEX, VEX_LIG; 8720b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 8730b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 8740b57cec5SDimitry Andric WriteCvtSS2I>, 8750b57cec5SDimitry Andric XS, VEX, VEX_W, VEX_LIG; 8760b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 8770b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 8780b57cec5SDimitry Andric WriteCvtSD2I>, 8790b57cec5SDimitry Andric XD, VEX, VEX_LIG; 8800b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 8810b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 8820b57cec5SDimitry Andric WriteCvtSD2I>, 8830b57cec5SDimitry Andric XD, VEX, VEX_W, VEX_LIG; 8840b57cec5SDimitry Andric} 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx 8870b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands, 8880b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly 8890b57cec5SDimitry Andric// where appropriate to do so. 8900b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 8910b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 8920b57cec5SDimitry Andric WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; 8930b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 8940b57cec5SDimitry Andric WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; 8950b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 8960b57cec5SDimitry Andric WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; 8970b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 8980b57cec5SDimitry Andric WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; 8990b57cec5SDimitry Andric} // isCodeGenOnly = 1 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 9020b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 9030b57cec5SDimitry Andric (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 9040b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 9050b57cec5SDimitry Andric (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 9060b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 9070b57cec5SDimitry Andric (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 9080b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 9090b57cec5SDimitry Andric (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp GR32:$src)), 9120b57cec5SDimitry Andric (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 9130b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp GR64:$src)), 9140b57cec5SDimitry Andric (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 9150b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp GR32:$src)), 9160b57cec5SDimitry Andric (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 9170b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp GR64:$src)), 9180b57cec5SDimitry Andric (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 9190b57cec5SDimitry Andric} 9200b57cec5SDimitry Andric 9210b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 9220b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 9230b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 9240b57cec5SDimitry Andric WriteCvtSS2I>, XS; 9250b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 9260b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 9270b57cec5SDimitry Andric WriteCvtSS2I>, XS, REX_W; 9280b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 9290b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 9300b57cec5SDimitry Andric WriteCvtSD2I>, XD; 9310b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 9320b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 9330b57cec5SDimitry Andric WriteCvtSD2I>, XD, REX_W; 9340b57cec5SDimitry Andricdefm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 9350b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{l}", 9360b57cec5SDimitry Andric WriteCvtI2SS, ReadInt2Fpu>, XS; 9370b57cec5SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 9380b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{q}", 9390b57cec5SDimitry Andric WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W; 9400b57cec5SDimitry Andricdefm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 9410b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{l}", 9420b57cec5SDimitry Andric WriteCvtI2SD, ReadInt2Fpu>, XD; 9430b57cec5SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 9440b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{q}", 9450b57cec5SDimitry Andric WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; 9460b57cec5SDimitry Andric} // isCodeGenOnly = 1 9470b57cec5SDimitry Andric 9480b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM 9490b57cec5SDimitry Andric// and/or XMM operand(s). 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 9520b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, SDNode OpNode, 9530b57cec5SDimitry Andric Operand memop, ComplexPattern mem_cpat, string asm, 9540b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 9550b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 9560b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 9570b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 9580b57cec5SDimitry Andric Sched<[sched]>; 9590b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 9600b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 9610b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>, 9620b57cec5SDimitry Andric Sched<[sched.Folded]>; 9630b57cec5SDimitry Andric} 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 9660b57cec5SDimitry Andric RegisterClass DstRC, X86MemOperand x86memop, 9670b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 9680b57cec5SDimitry Andric bit Is2Addr = 1> { 9690b57cec5SDimitry Andriclet hasSideEffects = 0 in { 9700b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 9710b57cec5SDimitry Andric !if(Is2Addr, 9720b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 9730b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 9740b57cec5SDimitry Andric []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 9750b57cec5SDimitry Andric let mayLoad = 1 in 9760b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 9770b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src2), 9780b57cec5SDimitry Andric !if(Is2Addr, 9790b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 9800b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 9810b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 9820b57cec5SDimitry Andric} 9830b57cec5SDimitry Andric} 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 9860b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 9870b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 9880b57cec5SDimitry Andric WriteCvtSD2I>, XD, VEX, VEX_LIG; 9890b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 9900b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 9910b57cec5SDimitry Andric WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; 9920b57cec5SDimitry Andric} 9930b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 9940b57cec5SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; 9950b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 9960b57cec5SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10000b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 10010b57cec5SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; 10020b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 10030b57cec5SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; 10040b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 10050b57cec5SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; 10060b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 10070b57cec5SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; 10080b57cec5SDimitry Andric} 10090b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 10100b57cec5SDimitry Andric defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 10110b57cec5SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS; 10120b57cec5SDimitry Andric defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 10130b57cec5SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W; 10140b57cec5SDimitry Andric defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 10150b57cec5SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; 10160b57cec5SDimitry Andric defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 10170b57cec5SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W; 10180b57cec5SDimitry Andric} 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10210b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 10220b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10230b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 10240b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10250b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 10260b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 10270b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 10300b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 10310b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 10320b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 10350b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 10360b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 10370b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 10380b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 10390b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 10400b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 10410b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 10420b57cec5SDimitry Andric 10430b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 10440b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 10450b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 10460b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric/// SSE 1 Only 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric// Aliases for intrinsics 10510b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10520b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 10530b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 10540b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_LIG; 10550b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 10560b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 10570b57cec5SDimitry Andric "cvttss2si", WriteCvtSS2I>, 10580b57cec5SDimitry Andric XS, VEX, VEX_LIG, VEX_W; 10590b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 10600b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 10610b57cec5SDimitry Andric WriteCvtSS2I>, XD, VEX, VEX_LIG; 10620b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 10630b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 10640b57cec5SDimitry Andric "cvttsd2si", WriteCvtSS2I>, 10650b57cec5SDimitry Andric XD, VEX, VEX_LIG, VEX_W; 10660b57cec5SDimitry Andric} 10670b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 10680b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 10690b57cec5SDimitry Andric WriteCvtSS2I>, XS; 10700b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 10710b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 10720b57cec5SDimitry Andric "cvttss2si", WriteCvtSS2I>, XS, REX_W; 10730b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 10740b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 10750b57cec5SDimitry Andric WriteCvtSD2I>, XD; 10760b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 10770b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 10780b57cec5SDimitry Andric "cvttsd2si", WriteCvtSD2I>, XD, REX_W; 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 10810b57cec5SDimitry Andric (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 10820b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 10830b57cec5SDimitry Andric (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 10840b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 10850b57cec5SDimitry Andric (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 10860b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 10870b57cec5SDimitry Andric (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 10880b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 10890b57cec5SDimitry Andric (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 10900b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 10910b57cec5SDimitry Andric (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 10920b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 10930b57cec5SDimitry Andric (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 10940b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 10950b57cec5SDimitry Andric (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 10980b57cec5SDimitry Andric (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 10990b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 11000b57cec5SDimitry Andric (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 11010b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 11020b57cec5SDimitry Andric (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11030b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 11040b57cec5SDimitry Andric (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 11050b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 11060b57cec5SDimitry Andric (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11070b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 11080b57cec5SDimitry Andric (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 11090b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 11100b57cec5SDimitry Andric (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11110b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 11120b57cec5SDimitry Andric (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 11150b57cec5SDimitry Andricdefm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 11160b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 11170b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_LIG; 11180b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 11190b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 11200b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; 11210b57cec5SDimitry Andric} 11220b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 11230b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 11240b57cec5SDimitry Andric WriteCvtSS2I>, XS; 11250b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 11260b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 11270b57cec5SDimitry Andric WriteCvtSS2I>, XS, REX_W; 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andricdefm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 11300b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 11310b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 11320b57cec5SDimitry Andric PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; 11330b57cec5SDimitry Andricdefm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 11340b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 11350b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PSY>, 11360b57cec5SDimitry Andric PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 11390b57cec5SDimitry Andric "cvtdq2ps\t{$src, $dst|$dst, $src}", 11400b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 11410b57cec5SDimitry Andric PS, Requires<[UseSSE2]>; 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric// AVX aliases 11440b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 11450b57cec5SDimitry Andric (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11460b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 11470b57cec5SDimitry Andric (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 11480b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 11490b57cec5SDimitry Andric (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11500b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 11510b57cec5SDimitry Andric (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 11520b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 11530b57cec5SDimitry Andric (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11540b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 11550b57cec5SDimitry Andric (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 11560b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 11570b57cec5SDimitry Andric (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11580b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 11590b57cec5SDimitry Andric (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 11600b57cec5SDimitry Andric 11610b57cec5SDimitry Andric// SSE aliases 11620b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 11630b57cec5SDimitry Andric (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11640b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 11650b57cec5SDimitry Andric (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 11660b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 11670b57cec5SDimitry Andric (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11680b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 11690b57cec5SDimitry Andric (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 11700b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 11710b57cec5SDimitry Andric (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11720b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 11730b57cec5SDimitry Andric (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 11740b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 11750b57cec5SDimitry Andric (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11760b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 11770b57cec5SDimitry Andric (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric/// SSE 2 Only 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric// Convert scalar double to scalar single 11820b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in { 11830b57cec5SDimitry Andricdef VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 11840b57cec5SDimitry Andric (ins FR32:$src1, FR64:$src2), 11850b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11860b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG, 11870b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 11880b57cec5SDimitry Andriclet mayLoad = 1 in 11890b57cec5SDimitry Andricdef VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 11900b57cec5SDimitry Andric (ins FR32:$src1, f64mem:$src2), 11910b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11920b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, 11930b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 11940b57cec5SDimitry Andric} 11950b57cec5SDimitry Andric 11960b57cec5SDimitry Andricdef : Pat<(f32 (fpround FR64:$src)), 11970b57cec5SDimitry Andric (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 11980b57cec5SDimitry Andric Requires<[UseAVX]>; 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 12010b57cec5SDimitry Andricdef CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 12020b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 12030b57cec5SDimitry Andric [(set FR32:$dst, (fpround FR64:$src))]>, 12040b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 12050b57cec5SDimitry Andricdef CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 12060b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 12070b57cec5SDimitry Andric [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, 12080b57cec5SDimitry Andric XD, Requires<[UseSSE2, OptForSize]>, 12090b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded]>; 12100b57cec5SDimitry Andric} 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 12130b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 12140b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 12150b57cec5SDimitry Andric [(set VR128:$dst, 12160b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 12170b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 12180b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 12190b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 12200b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 12210b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 12220b57cec5SDimitry Andric [(set VR128:$dst, 12230b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>, 12240b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 12250b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 12260b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 12270b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 12280b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 12290b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 12300b57cec5SDimitry Andric [(set VR128:$dst, 12310b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 12320b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 12330b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 12340b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 12350b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 12360b57cec5SDimitry Andric [(set VR128:$dst, 12370b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>, 12380b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, 12390b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 12400b57cec5SDimitry Andric} 12410b57cec5SDimitry Andric 12420b57cec5SDimitry Andric// Convert scalar single to scalar double 12430b57cec5SDimitry Andric// SSE2 instructions with XS prefix 12440b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0 in { 12450b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 12460b57cec5SDimitry Andric (ins FR64:$src1, FR32:$src2), 12470b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 12480b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 12490b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; 12500b57cec5SDimitry Andriclet mayLoad = 1 in 12510b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 12520b57cec5SDimitry Andric (ins FR64:$src1, f32mem:$src2), 12530b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 12540b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 12550b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 12560b57cec5SDimitry Andric Requires<[UseAVX, OptForSize]>; 12570b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andricdef : Pat<(f64 (fpextend FR32:$src)), 12600b57cec5SDimitry Andric (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 12610b57cec5SDimitry Andricdef : Pat<(fpextend (loadf32 addr:$src)), 12620b57cec5SDimitry Andric (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 12650b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 12660b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 12670b57cec5SDimitry Andric [(set FR64:$dst, (fpextend FR32:$src))]>, 12680b57cec5SDimitry Andric XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; 12690b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 12700b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 12710b57cec5SDimitry Andric [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, 12720b57cec5SDimitry Andric XS, Requires<[UseSSE2, OptForSize]>, 12730b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded]>; 12740b57cec5SDimitry Andric} // isCodeGenOnly = 1 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andriclet hasSideEffects = 0 in { 12770b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 12780b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 12790b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 12800b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, 12810b57cec5SDimitry Andric Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 12820b57cec5SDimitry Andriclet mayLoad = 1 in 12830b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 12840b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 12850b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 12860b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, 12870b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 12880b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 12890b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 12900b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 12910b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 12920b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 12930b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>; 12940b57cec5SDimitry Andriclet mayLoad = 1 in 12950b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 12960b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 12970b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 12980b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 12990b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 13000b57cec5SDimitry Andric} 13010b57cec5SDimitry Andric} // hasSideEffects = 0 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 13040b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 13050b57cec5SDimitry Andric// vmovs{s,d} instructions 13060b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 13070b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13080b57cec5SDimitry Andric (v4f32 VR128:$dst), 13090b57cec5SDimitry Andric (v4f32 (scalar_to_vector 13100b57cec5SDimitry Andric (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 13110b57cec5SDimitry Andric (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13140b57cec5SDimitry Andric (v2f64 VR128:$dst), 13150b57cec5SDimitry Andric (v2f64 (scalar_to_vector 13160b57cec5SDimitry Andric (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 13170b57cec5SDimitry Andric (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13200b57cec5SDimitry Andric (v4f32 VR128:$dst), 13210b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 13220b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 13230b57cec5SDimitry Andric 13240b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13250b57cec5SDimitry Andric (v4f32 VR128:$dst), 13260b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 13270b57cec5SDimitry Andric (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13300b57cec5SDimitry Andric (v4f32 VR128:$dst), 13310b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 13320b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 13330b57cec5SDimitry Andric 13340b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13350b57cec5SDimitry Andric (v4f32 VR128:$dst), 13360b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 13370b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13400b57cec5SDimitry Andric (v2f64 VR128:$dst), 13410b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 13420b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13450b57cec5SDimitry Andric (v2f64 VR128:$dst), 13460b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 13470b57cec5SDimitry Andric (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13500b57cec5SDimitry Andric (v2f64 VR128:$dst), 13510b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 13520b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 13530b57cec5SDimitry Andric 13540b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13550b57cec5SDimitry Andric (v2f64 VR128:$dst), 13560b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 13570b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 13580b57cec5SDimitry Andric} // Predicates = [UseAVX] 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 13610b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13620b57cec5SDimitry Andric (v4f32 VR128:$dst), 13630b57cec5SDimitry Andric (v4f32 (scalar_to_vector 13640b57cec5SDimitry Andric (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 13650b57cec5SDimitry Andric (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13680b57cec5SDimitry Andric (v2f64 VR128:$dst), 13690b57cec5SDimitry Andric (v2f64 (scalar_to_vector 13700b57cec5SDimitry Andric (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 13710b57cec5SDimitry Andric (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 13720b57cec5SDimitry Andric 13730b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13740b57cec5SDimitry Andric (v2f64 VR128:$dst), 13750b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 13760b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 13770b57cec5SDimitry Andric 13780b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13790b57cec5SDimitry Andric (v2f64 VR128:$dst), 13800b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 13810b57cec5SDimitry Andric (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13840b57cec5SDimitry Andric (v2f64 VR128:$dst), 13850b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 13860b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 13870b57cec5SDimitry Andric 13880b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 13890b57cec5SDimitry Andric (v2f64 VR128:$dst), 13900b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 13910b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 13920b57cec5SDimitry Andric} // Predicates = [UseSSE2] 13930b57cec5SDimitry Andric 13940b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 13950b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 13960b57cec5SDimitry Andric (v4f32 VR128:$dst), 13970b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 13980b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 13990b57cec5SDimitry Andric 14000b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14010b57cec5SDimitry Andric (v4f32 VR128:$dst), 14020b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 14030b57cec5SDimitry Andric (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 14040b57cec5SDimitry Andric 14050b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14060b57cec5SDimitry Andric (v4f32 VR128:$dst), 14070b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 14080b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14110b57cec5SDimitry Andric (v4f32 VR128:$dst), 14120b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 14130b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 14140b57cec5SDimitry Andric} // Predicates = [UseSSE1] 14150b57cec5SDimitry Andric 14160b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 14170b57cec5SDimitry Andric// Convert packed single/double fp to doubleword 14180b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 14190b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14200b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 14210b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 14220b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 14230b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14240b57cec5SDimitry Andric [(set VR128:$dst, 14250b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 14260b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 14270b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 14280b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14290b57cec5SDimitry Andric [(set VR256:$dst, 14300b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 14310b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 14320b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 14330b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14340b57cec5SDimitry Andric [(set VR256:$dst, 14350b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 14360b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 14370b57cec5SDimitry Andric} 14380b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 14390b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14400b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 14410b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 14420b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 14430b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 14440b57cec5SDimitry Andric [(set VR128:$dst, 14450b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 14460b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric 14490b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers 14500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 14510b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 14520b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 14530b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 14540b57cec5SDimitry Andricdef VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 14550b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 14560b57cec5SDimitry Andric [(set VR128:$dst, 14570b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 14580b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 14590b57cec5SDimitry Andric 14600b57cec5SDimitry Andric// XMM only 14610b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 14620b57cec5SDimitry Andric "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 14630b57cec5SDimitry Andric [(set VR128:$dst, 14640b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 14650b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>, VEX_WIG; 14660b57cec5SDimitry Andric 14670b57cec5SDimitry Andric// YMM only 14680b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 14690b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 14700b57cec5SDimitry Andric [(set VR128:$dst, 14710b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 14720b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 14730b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 14740b57cec5SDimitry Andric "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 14750b57cec5SDimitry Andric [(set VR128:$dst, 14760b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 14770b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 14780b57cec5SDimitry Andric} 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 14810b57cec5SDimitry Andric (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 14820b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 14830b57cec5SDimitry Andric (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 14840b57cec5SDimitry Andric 14850b57cec5SDimitry Andricdef CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 14860b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 14870b57cec5SDimitry Andric [(set VR128:$dst, 14880b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 14890b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>; 14900b57cec5SDimitry Andricdef CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 14910b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 14920b57cec5SDimitry Andric [(set VR128:$dst, 14930b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 14940b57cec5SDimitry Andric Sched<[WriteCvtPD2I]>; 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword 14970b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix 14980b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 14990b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15000b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15010b57cec5SDimitry Andric [(set VR128:$dst, 15020b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, 15030b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 15040b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15050b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15060b57cec5SDimitry Andric [(set VR128:$dst, 15070b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>, 15080b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 15090b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 15100b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15110b57cec5SDimitry Andric [(set VR256:$dst, 15120b57cec5SDimitry Andric (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>, 15130b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 15140b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 15150b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15160b57cec5SDimitry Andric [(set VR256:$dst, 15170b57cec5SDimitry Andric (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>, 15180b57cec5SDimitry Andric VEX, VEX_L, 15190b57cec5SDimitry Andric Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 15200b57cec5SDimitry Andric} 15210b57cec5SDimitry Andric 15220b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15230b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15240b57cec5SDimitry Andric [(set VR128:$dst, 15250b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, 15260b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 15270b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15280b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 15290b57cec5SDimitry Andric [(set VR128:$dst, 15300b57cec5SDimitry Andric (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, 15310b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 15340b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 15350b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 15360b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15370b57cec5SDimitry Andric// XMM only 15380b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15390b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 15400b57cec5SDimitry Andric [(set VR128:$dst, 15410b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, 15420b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 15430b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15440b57cec5SDimitry Andric "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 15450b57cec5SDimitry Andric [(set VR128:$dst, 15460b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>, 15470b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric// YMM only 15500b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 15510b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 15520b57cec5SDimitry Andric [(set VR128:$dst, 15530b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>, 15540b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 15550b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 15560b57cec5SDimitry Andric "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 15570b57cec5SDimitry Andric [(set VR128:$dst, 15580b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>, 15590b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 15600b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 15610b57cec5SDimitry Andric 15620b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 15630b57cec5SDimitry Andric (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 15640b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 15650b57cec5SDimitry Andric (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15680b57cec5SDimitry Andric def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 15690b57cec5SDimitry Andric (VCVTTPD2DQYrr VR256:$src)>; 15700b57cec5SDimitry Andric def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 15710b57cec5SDimitry Andric (VCVTTPD2DQYrm addr:$src)>; 15720b57cec5SDimitry Andric} 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15750b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 15760b57cec5SDimitry Andric [(set VR128:$dst, 15770b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, 15780b57cec5SDimitry Andric Sched<[WriteCvtPD2I]>; 15790b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 15800b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 15810b57cec5SDimitry Andric [(set VR128:$dst, 15820b57cec5SDimitry Andric (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, 15830b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>; 15840b57cec5SDimitry Andric 15850b57cec5SDimitry Andric// Convert packed single to packed double 15860b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15870b57cec5SDimitry Andric // SSE2 instructions without OpSize prefix 15880b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15890b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 15900b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, 15910b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; 15920b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 15930b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 15940b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 15950b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; 15960b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 15970b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 15980b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>, 15990b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; 16000b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 16010b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 16020b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 16030b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; 16040b57cec5SDimitry Andric} 16050b57cec5SDimitry Andric 16060b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 16070b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16080b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 16090b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, 16100b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD]>; 16110b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 16120b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 16130b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 16140b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD.Folded]>; 16150b57cec5SDimitry Andric} 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP 16180b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 16190b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 16200b57cec5SDimitry Andricdef VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 16210b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 16220b57cec5SDimitry Andric [(set VR128:$dst, 16230b57cec5SDimitry Andric (v2f64 (X86VSintToFP 16240b57cec5SDimitry Andric (bc_v4i32 16250b57cec5SDimitry Andric (v2i64 (scalar_to_vector 16260b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 16270b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; 16280b57cec5SDimitry Andricdef VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16290b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 16300b57cec5SDimitry Andric [(set VR128:$dst, 16310b57cec5SDimitry Andric (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, 16320b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; 16330b57cec5SDimitry Andricdef VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 16340b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 16350b57cec5SDimitry Andric [(set VR256:$dst, 16360b57cec5SDimitry Andric (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>, 16370b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 16380b57cec5SDimitry Andric VEX_WIG; 16390b57cec5SDimitry Andricdef VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 16400b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 16410b57cec5SDimitry Andric [(set VR256:$dst, 16420b57cec5SDimitry Andric (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>, 16430b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; 16440b57cec5SDimitry Andric} 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 16470b57cec5SDimitry Andricdef CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 16480b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 16490b57cec5SDimitry Andric [(set VR128:$dst, 16500b57cec5SDimitry Andric (v2f64 (X86VSintToFP 16510b57cec5SDimitry Andric (bc_v4i32 16520b57cec5SDimitry Andric (v2i64 (scalar_to_vector 16530b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 16540b57cec5SDimitry Andric Sched<[WriteCvtI2PDLd]>; 16550b57cec5SDimitry Andricdef CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16560b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 16570b57cec5SDimitry Andric [(set VR128:$dst, 16580b57cec5SDimitry Andric (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, 16590b57cec5SDimitry Andric Sched<[WriteCvtI2PD]>; 16600b57cec5SDimitry Andric 16610b57cec5SDimitry Andric// AVX register conversion intrinsics 16620b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 16630b57cec5SDimitry Andric def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 16640b57cec5SDimitry Andric (VCVTDQ2PDrm addr:$src)>; 16650b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andric// SSE2 register conversion intrinsics 16680b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 16690b57cec5SDimitry Andric def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 16700b57cec5SDimitry Andric (CVTDQ2PDrm addr:$src)>; 16710b57cec5SDimitry Andric} // Predicates = [UseSSE2] 16720b57cec5SDimitry Andric 16730b57cec5SDimitry Andric// Convert packed double to packed single 16740b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 16750b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 16760b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 16770b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 16780b57cec5SDimitry Andric// XMM only 16790b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16800b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 16810b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, 16820b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; 16830b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16840b57cec5SDimitry Andric "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 16850b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>, 16860b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; 16870b57cec5SDimitry Andric 16880b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 16890b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 16900b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround VR256:$src))]>, 16910b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; 16920b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 16930b57cec5SDimitry Andric "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 16940b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>, 16950b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; 16960b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 16970b57cec5SDimitry Andric 16980b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 16990b57cec5SDimitry Andric (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 17000b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 17010b57cec5SDimitry Andric (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 17020b57cec5SDimitry Andric 17030b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17040b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 17050b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, 17060b57cec5SDimitry Andric Sched<[WriteCvtPD2PS]>; 17070b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 17080b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 17090b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, 17100b57cec5SDimitry Andric Sched<[WriteCvtPD2PS.Folded]>; 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 17130b57cec5SDimitry Andric def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), 17140b57cec5SDimitry Andric (VCVTPD2PSYrr VR256:$src)>; 17150b57cec5SDimitry Andric def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), 17160b57cec5SDimitry Andric (VCVTPD2PSYrm addr:$src)>; 17170b57cec5SDimitry Andric} 17180b57cec5SDimitry Andric 17190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 17200b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions 17210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 17220b57cec5SDimitry Andric 17230b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 17240b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 17250b57cec5SDimitry Andric SDNode OpNode, ValueType VT, 17260b57cec5SDimitry Andric PatFrag ld_frag, string asm, 17270b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 17280b57cec5SDimitry Andric let isCommutable = 1 in 17290b57cec5SDimitry Andric def rr : SIi8<0xC2, MRMSrcReg, 17300b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1731*8bcb0991SDimitry Andric [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>, 17320b57cec5SDimitry Andric Sched<[sched]>; 17330b57cec5SDimitry Andric def rm : SIi8<0xC2, MRMSrcMem, 17340b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 17350b57cec5SDimitry Andric [(set RC:$dst, (OpNode (VT RC:$src1), 1736*8bcb0991SDimitry Andric (ld_frag addr:$src2), timm:$cc))]>, 17370b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 17380b57cec5SDimitry Andric} 17390b57cec5SDimitry Andric 17400b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 17410b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 17420b57cec5SDimitry Andric defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32, 17430b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 17440b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG; 17450b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 17460b57cec5SDimitry Andric defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64, 17470b57cec5SDimitry Andric "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 17480b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl>, 17490b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 17500b57cec5SDimitry Andric 17510b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 17520b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 17530b57cec5SDimitry Andric defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32, 17540b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 17550b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl>, XS; 17560b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 17570b57cec5SDimitry Andric defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64, 17580b57cec5SDimitry Andric "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 17590b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl>, XD; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric} 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andricmulticlass sse12_cmp_scalar_int<Operand memop, 17640b57cec5SDimitry Andric Intrinsic Int, string asm, X86FoldableSchedWrite sched, 17650b57cec5SDimitry Andric ComplexPattern mem_cpat> { 17660b57cec5SDimitry Andric def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 17670b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src, u8imm:$cc), asm, 17680b57cec5SDimitry Andric [(set VR128:$dst, (Int VR128:$src1, 1769*8bcb0991SDimitry Andric VR128:$src, timm:$cc))]>, 17700b57cec5SDimitry Andric Sched<[sched]>; 17710b57cec5SDimitry Andriclet mayLoad = 1 in 17720b57cec5SDimitry Andric def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 17730b57cec5SDimitry Andric (ins VR128:$src1, memop:$src, u8imm:$cc), asm, 17740b57cec5SDimitry Andric [(set VR128:$dst, (Int VR128:$src1, 1775*8bcb0991SDimitry Andric mem_cpat:$src, timm:$cc))]>, 17760b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 17770b57cec5SDimitry Andric} 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric// Aliases to match intrinsics which expect XMM operand(s). 17800b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 17810b57cec5SDimitry Andricdefm VCMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss, 17820b57cec5SDimitry Andric "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}", 17830b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 17840b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 17850b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 17860b57cec5SDimitry Andricdefm VCMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd, 17870b57cec5SDimitry Andric "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}", 17880b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 17890b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 17900b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 17910b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 17920b57cec5SDimitry Andric defm CMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss, 17930b57cec5SDimitry Andric "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}", 17940b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; 17950b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 17960b57cec5SDimitry Andric defm CMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd, 17970b57cec5SDimitry Andric "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}", 17980b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; 17990b57cec5SDimitry Andric} 18000b57cec5SDimitry Andric 18010b57cec5SDimitry Andric 18020b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 18030b57cec5SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 18040b57cec5SDimitry Andric ValueType vt, X86MemOperand x86memop, 18050b57cec5SDimitry Andric PatFrag ld_frag, string OpcodeStr, 18060b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 18070b57cec5SDimitry Andriclet hasSideEffects = 0 in { 18080b57cec5SDimitry Andric def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 18090b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18100b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 18110b57cec5SDimitry Andric Sched<[sched]>; 18120b57cec5SDimitry Andriclet mayLoad = 1 in 18130b57cec5SDimitry Andric def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 18140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18150b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 18160b57cec5SDimitry Andric (ld_frag addr:$src2)))]>, 18170b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 18180b57cec5SDimitry Andric} 18190b57cec5SDimitry Andric} 18200b57cec5SDimitry Andric 18210b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 18220b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 18230b57cec5SDimitry Andric ValueType vt, Operand memop, 18240b57cec5SDimitry Andric ComplexPattern mem_cpat, string OpcodeStr, 18250b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 18260b57cec5SDimitry Andric def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 18270b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18280b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 18290b57cec5SDimitry Andric Sched<[sched]>; 18300b57cec5SDimitry Andriclet mayLoad = 1 in 18310b57cec5SDimitry Andric def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 18320b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18330b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 18340b57cec5SDimitry Andric mem_cpat:$src2))]>, 18350b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 18360b57cec5SDimitry Andric} 18370b57cec5SDimitry Andric 18380b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 18390b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 18400b57cec5SDimitry Andric "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 18410b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 18420b57cec5SDimitry Andric "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 18430b57cec5SDimitry Andric let Pattern = []<dag> in { 18440b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, 18450b57cec5SDimitry Andric "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 18460b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, 18470b57cec5SDimitry Andric "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 18480b57cec5SDimitry Andric } 18490b57cec5SDimitry Andric 18500b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 18510b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 18520b57cec5SDimitry Andric sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 18530b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 18540b57cec5SDimitry Andric sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 18550b57cec5SDimitry Andric 18560b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 18570b57cec5SDimitry Andric sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 18580b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 18590b57cec5SDimitry Andric sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 18600b57cec5SDimitry Andric } 18610b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 18620b57cec5SDimitry Andric "ucomiss", WriteFCom>, PS; 18630b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 18640b57cec5SDimitry Andric "ucomisd", WriteFCom>, PD; 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric let Pattern = []<dag> in { 18670b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, 18680b57cec5SDimitry Andric "comiss", WriteFCom>, PS; 18690b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, 18700b57cec5SDimitry Andric "comisd", WriteFCom>, PD; 18710b57cec5SDimitry Andric } 18720b57cec5SDimitry Andric 18730b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 18740b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 18750b57cec5SDimitry Andric sse_load_f32, "ucomiss", WriteFCom>, PS; 18760b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 18770b57cec5SDimitry Andric sse_load_f64, "ucomisd", WriteFCom>, PD; 18780b57cec5SDimitry Andric 18790b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 18800b57cec5SDimitry Andric sse_load_f32, "comiss", WriteFCom>, PS; 18810b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 18820b57cec5SDimitry Andric sse_load_f64, "comisd", WriteFCom>, PD; 18830b57cec5SDimitry Andric } 18840b57cec5SDimitry Andric} // Defs = [EFLAGS] 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions 18870b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 18880b57cec5SDimitry Andric ValueType VT, string asm, 18890b57cec5SDimitry Andric X86FoldableSchedWrite sched, 18900b57cec5SDimitry Andric Domain d, PatFrag ld_frag> { 18910b57cec5SDimitry Andric let isCommutable = 1 in 18920b57cec5SDimitry Andric def rri : PIi8<0xC2, MRMSrcReg, 18930b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1894*8bcb0991SDimitry Andric [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, 18950b57cec5SDimitry Andric Sched<[sched]>; 18960b57cec5SDimitry Andric def rmi : PIi8<0xC2, MRMSrcMem, 18970b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 18980b57cec5SDimitry Andric [(set RC:$dst, 1899*8bcb0991SDimitry Andric (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, 19000b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 19010b57cec5SDimitry Andric} 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 19040b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19050b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; 19060b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 19070b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19080b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; 19090b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 19100b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19110b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; 19120b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 19130b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19140b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; 19150b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 19160b57cec5SDimitry Andric defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 19170b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 19180b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; 19190b57cec5SDimitry Andric defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 19200b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 19210b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; 19220b57cec5SDimitry Andric} 19230b57cec5SDimitry Andric 1924*8bcb0991SDimitry Andricdef CommutableCMPCC : PatLeaf<(timm), [{ 19250b57cec5SDimitry Andric uint64_t Imm = N->getZExtValue() & 0x7; 19260b57cec5SDimitry Andric return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 19270b57cec5SDimitry Andric}]>; 19280b57cec5SDimitry Andric 19290b57cec5SDimitry Andric// Patterns to select compares with loads in first operand. 19300b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 19310b57cec5SDimitry Andric def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, 19320b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1933*8bcb0991SDimitry Andric (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; 19340b57cec5SDimitry Andric 19350b57cec5SDimitry Andric def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, 19360b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1937*8bcb0991SDimitry Andric (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, 19400b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1941*8bcb0991SDimitry Andric (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, 19440b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1945*8bcb0991SDimitry Andric (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 19480b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1949*8bcb0991SDimitry Andric (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 19500b57cec5SDimitry Andric 19510b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 19520b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1953*8bcb0991SDimitry Andric (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 19540b57cec5SDimitry Andric} 19550b57cec5SDimitry Andric 19560b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 19570b57cec5SDimitry Andric def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, 19580b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1959*8bcb0991SDimitry Andric (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 19600b57cec5SDimitry Andric 19610b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 19620b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1963*8bcb0991SDimitry Andric (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 19640b57cec5SDimitry Andric} 19650b57cec5SDimitry Andric 19660b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 19670b57cec5SDimitry Andric def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, 19680b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1969*8bcb0991SDimitry Andric (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 19700b57cec5SDimitry Andric 19710b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 19720b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1973*8bcb0991SDimitry Andric (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 19740b57cec5SDimitry Andric} 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 19770b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions 19780b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 19810b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 19820b57cec5SDimitry Andric ValueType vt, string asm, PatFrag mem_frag, 19830b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 19840b57cec5SDimitry Andric bit IsCommutable = 0> { 19850b57cec5SDimitry Andric def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 19860b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 19870b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 1988*8bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 19890b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 19900b57cec5SDimitry Andric let isCommutable = IsCommutable in 19910b57cec5SDimitry Andric def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 19920b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 19930b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 1994*8bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 19950b57cec5SDimitry Andric Sched<[sched]>; 19960b57cec5SDimitry Andric} 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 19990b57cec5SDimitry Andric defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 20000b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20010b57cec5SDimitry Andric loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 20020b57cec5SDimitry Andric PS, VEX_4V, VEX_WIG; 20030b57cec5SDimitry Andric defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 20040b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20050b57cec5SDimitry Andric loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 20060b57cec5SDimitry Andric PS, VEX_4V, VEX_L, VEX_WIG; 20070b57cec5SDimitry Andric defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 20080b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20090b57cec5SDimitry Andric loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 20100b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 20110b57cec5SDimitry Andric defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 20120b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20130b57cec5SDimitry Andric loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 20140b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 20150b57cec5SDimitry Andric} 20160b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 20170b57cec5SDimitry Andric defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 20180b57cec5SDimitry Andric "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 20190b57cec5SDimitry Andric memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 20200b57cec5SDimitry Andric defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 20210b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 20220b57cec5SDimitry Andric memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 20230b57cec5SDimitry Andric} 20240b57cec5SDimitry Andric 20250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20260b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions 20270b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 20300b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 20310b57cec5SDimitry Andric PatFrag mem_frag, RegisterClass RC, 20320b57cec5SDimitry Andric X86MemOperand x86memop, string asm, 20330b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 20340b57cec5SDimitry Andric bit IsCommutable = 0> { 20350b57cec5SDimitry Andric let isCommutable = IsCommutable in 20360b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, 20370b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 20380b57cec5SDimitry Andric asm, [(set RC:$dst, 20390b57cec5SDimitry Andric (vt (OpNode RC:$src1, RC:$src2)))], d>, 20400b57cec5SDimitry Andric Sched<[sched]>; 20410b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, 20420b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 20430b57cec5SDimitry Andric asm, [(set RC:$dst, 20440b57cec5SDimitry Andric (vt (OpNode RC:$src1, 20450b57cec5SDimitry Andric (mem_frag addr:$src2))))], d>, 20460b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 20470b57cec5SDimitry Andric} 20480b57cec5SDimitry Andric 20490b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 20500b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 20510b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20520b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 20530b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 20540b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20550b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; 20560b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 20570b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20580b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 20590b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 20600b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20610b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 20640b57cec5SDimitry Andric VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20650b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 20660b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 20670b57cec5SDimitry Andric VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20680b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 20690b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 20700b57cec5SDimitry Andric VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20710b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 20720b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 20730b57cec5SDimitry Andric VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 20740b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 20750b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX] 20760b57cec5SDimitry Andric 20770b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 20780b57cec5SDimitry Andric defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 20790b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 20800b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 20810b57cec5SDimitry Andric defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 20820b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 20830b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 20840b57cec5SDimitry Andric defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 20850b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 20860b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 20870b57cec5SDimitry Andric defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 20880b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 20890b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; 20900b57cec5SDimitry Andric} // Constraints = "$src1 = $dst" 20910b57cec5SDimitry Andric 20920b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 20930b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 20940b57cec5SDimitry Andric (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 20950b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 20960b57cec5SDimitry Andric (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 20970b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 20980b57cec5SDimitry Andric (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 20990b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 21000b57cec5SDimitry Andric (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 21030b57cec5SDimitry Andric (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 21040b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 21050b57cec5SDimitry Andric (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 21060b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 21070b57cec5SDimitry Andric (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 21080b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 21090b57cec5SDimitry Andric (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 21100b57cec5SDimitry Andric} 21110b57cec5SDimitry Andric 21120b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 21130b57cec5SDimitry Andric // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 21140b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 2115*8bcb0991SDimitry Andric (v2f64 (simple_load addr:$src2)))), 21160b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 21170b57cec5SDimitry Andric} 21180b57cec5SDimitry Andric 21190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21200b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask 21210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 21240b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 21250b57cec5SDimitry Andric string asm, Domain d> { 21260b57cec5SDimitry Andric def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 21270b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 21280b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 21290b57cec5SDimitry Andric Sched<[WriteFMOVMSK]>; 21300b57cec5SDimitry Andric} 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 21330b57cec5SDimitry Andric defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 21340b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_WIG; 21350b57cec5SDimitry Andric defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 21360b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_WIG; 21370b57cec5SDimitry Andric defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 21380b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; 21390b57cec5SDimitry Andric defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 21400b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; 21410b57cec5SDimitry Andric 21420b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 21430b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 21440b57cec5SDimitry Andric (VMOVMSKPSrr VR128:$src)>; 21450b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 21460b57cec5SDimitry Andric (VMOVMSKPDrr VR128:$src)>; 21470b57cec5SDimitry Andric def : Pat<(X86movmsk (v8i32 VR256:$src)), 21480b57cec5SDimitry Andric (VMOVMSKPSYrr VR256:$src)>; 21490b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i64 VR256:$src)), 21500b57cec5SDimitry Andric (VMOVMSKPDYrr VR256:$src)>; 21510b57cec5SDimitry Andric} 21520b57cec5SDimitry Andric 21530b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 21540b57cec5SDimitry Andric SSEPackedSingle>, PS; 21550b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 21560b57cec5SDimitry Andric SSEPackedDouble>, PD; 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 21590b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 21600b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 21610b57cec5SDimitry Andric (MOVMSKPSrr VR128:$src)>; 21620b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 21630b57cec5SDimitry Andric (MOVMSKPDrr VR128:$src)>; 21640b57cec5SDimitry Andric} 21650b57cec5SDimitry Andric 21660b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 21670b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 21680b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 21690b57cec5SDimitry Andric 21700b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 21710b57cec5SDimitry Andric 21720b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator. 21730b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 21740b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 21750b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 21760b57cec5SDimitry Andric bit IsCommutable, bit Is2Addr> { 21770b57cec5SDimitry Andric let isCommutable = IsCommutable in 21780b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 21790b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 21800b57cec5SDimitry Andric !if(Is2Addr, 21810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 21820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 21830b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 21840b57cec5SDimitry Andric Sched<[sched]>; 21850b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 21860b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 21870b57cec5SDimitry Andric !if(Is2Addr, 21880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 21890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 21900b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 21910b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 21920b57cec5SDimitry Andric} 21930b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 21960b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, 21970b57cec5SDimitry Andric X86SchedWriteWidths sched, bit IsCommutable, 21980b57cec5SDimitry Andric Predicate prd> { 21990b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 22000b57cec5SDimitry Andric defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 22010b57cec5SDimitry Andric VR128, load, i128mem, sched.XMM, 22020b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_WIG; 22030b57cec5SDimitry Andric 22040b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 22050b57cec5SDimitry Andric defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 22060b57cec5SDimitry Andric memop, i128mem, sched.XMM, IsCommutable, 1>; 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 22090b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 22100b57cec5SDimitry Andric OpVT256, VR256, load, i256mem, sched.YMM, 22110b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; 22120b57cec5SDimitry Andric} 22130b57cec5SDimitry Andric 22140b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions 22150b57cec5SDimitry Andric 22160b57cec5SDimitry Andricdefm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 22170b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22180b57cec5SDimitry Andricdefm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 22190b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22200b57cec5SDimitry Andricdefm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 22210b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22220b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 22230b57cec5SDimitry Andric SchedWriteVecLogic, 0, NoVLX>; 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22260b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions 22270b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 22300b57cec5SDimitry Andric/// 22310b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2 22320b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later. 22330b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 22340b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 22350b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 22360b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 22370b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 22380b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; 22390b57cec5SDimitry Andric 22400b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 22410b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 22420b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; 22430b57cec5SDimitry Andric 22440b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 22450b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 22460b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_WIG; 22470b57cec5SDimitry Andric 22480b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 22490b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 22500b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_WIG; 22510b57cec5SDimitry Andric } 22520b57cec5SDimitry Andric 22530b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 22540b57cec5SDimitry Andric defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 22550b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 22560b57cec5SDimitry Andric [], []>, PS; 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 22590b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 22600b57cec5SDimitry Andric [], []>, PD; 22610b57cec5SDimitry Andric } 22620b57cec5SDimitry Andric} 22630b57cec5SDimitry Andric 22640b57cec5SDimitry Andricdefm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>; 22650b57cec5SDimitry Andricdefm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>; 22660b57cec5SDimitry Andricdefm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>; 22670b57cec5SDimitry Andriclet isCommutable = 0 in 22680b57cec5SDimitry Andric defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>; 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 22710b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 22720b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 22730b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 22740b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 22750b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 22760b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 22770b57cec5SDimitry Andric 22780b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 22790b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 22800b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 22810b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 22820b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 22830b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 22840b57cec5SDimitry Andric 22850b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 22860b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 22870b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 22880b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 22890b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 22900b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 22910b57cec5SDimitry Andric 22920b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 22930b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 22940b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 22950b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 22960b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 22970b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 22980b57cec5SDimitry Andric 22990b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 23000b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23010b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 23020b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23030b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 23040b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 23070b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23080b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 23090b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23100b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 23110b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23120b57cec5SDimitry Andric 23130b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 23140b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23150b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 23160b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23170b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 23180b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23190b57cec5SDimitry Andric 23200b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 23210b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23220b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 23230b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23240b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 23250b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23260b57cec5SDimitry Andric} 23270b57cec5SDimitry Andric 23280b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with 23290b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available. 23300b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 23310b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 23320b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 23330b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 23340b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 23350b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 23360b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 23370b57cec5SDimitry Andric def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 23380b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 23390b57cec5SDimitry Andric 23400b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 23410b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 23420b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 23430b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 23440b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 23450b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 23460b57cec5SDimitry Andric def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 23470b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 23480b57cec5SDimitry Andric 23490b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 23500b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 23510b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 23520b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 23530b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 23540b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 23550b57cec5SDimitry Andric def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 23560b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 23570b57cec5SDimitry Andric 23580b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 23590b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 23600b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 23610b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 23620b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 23630b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 23640b57cec5SDimitry Andric def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 23650b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 23680b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 23690b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 23700b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 23710b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 23720b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 23730b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 23740b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 23750b57cec5SDimitry Andric 23760b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 23770b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 23780b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 23790b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 23800b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 23810b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 23820b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 23830b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 23840b57cec5SDimitry Andric 23850b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 23860b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 23870b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 23880b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 23890b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 23900b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 23910b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 23920b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 23930b57cec5SDimitry Andric 23940b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 23950b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 23960b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 23970b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 23980b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 23990b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24000b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 24010b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24020b57cec5SDimitry Andric} 24030b57cec5SDimitry Andric 24040b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 24050b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 24060b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24070b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 24080b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24090b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 24100b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24110b57cec5SDimitry Andric 24120b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 24130b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24140b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 24150b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24160b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 24170b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 24200b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24210b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 24220b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24230b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 24240b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24250b57cec5SDimitry Andric 24260b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 24270b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24280b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 24290b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24300b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 24310b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24320b57cec5SDimitry Andric 24330b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 24340b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 24350b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 24360b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 24370b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 24380b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 24390b57cec5SDimitry Andric 24400b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 24410b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 24420b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 24430b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 24440b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 24450b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 24460b57cec5SDimitry Andric 24470b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 24480b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 24490b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 24500b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 24510b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 24520b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 24530b57cec5SDimitry Andric 24540b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 24550b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 24560b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 24570b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 24580b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 24590b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 24600b57cec5SDimitry Andric} 24610b57cec5SDimitry Andric 24620b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 24630b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 24640b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 24650b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 24660b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 24670b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 24680b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 24690b57cec5SDimitry Andric 24700b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 24710b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 24720b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 24730b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 24740b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 24750b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 24760b57cec5SDimitry Andric 24770b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 24780b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 24790b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 24800b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 24810b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 24820b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 24830b57cec5SDimitry Andric 24840b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 24850b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 24860b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 24870b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 24880b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 24890b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 24900b57cec5SDimitry Andric 24910b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 24920b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 24930b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 24940b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 24950b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 24960b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 24970b57cec5SDimitry Andric 24980b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 24990b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25000b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 25010b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25020b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 25030b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25040b57cec5SDimitry Andric 25050b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 25060b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25070b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 25080b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25090b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 25100b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25110b57cec5SDimitry Andric 25120b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 25130b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25140b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 25150b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25160b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 25170b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25180b57cec5SDimitry Andric} 25190b57cec5SDimitry Andric 25200b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available. 25210b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 25220b57cec5SDimitry Andric (ANDPSrr VR128:$src1, VR128:$src2)>; 25230b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 25240b57cec5SDimitry Andric (ORPSrr VR128:$src1, VR128:$src2)>; 25250b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 25260b57cec5SDimitry Andric (XORPSrr VR128:$src1, VR128:$src2)>; 25270b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 25280b57cec5SDimitry Andric (ANDNPSrr VR128:$src1, VR128:$src2)>; 25290b57cec5SDimitry Andric 25300b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 25310b57cec5SDimitry Andric (ANDPSrm VR128:$src1, addr:$src2)>; 25320b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 25330b57cec5SDimitry Andric (ORPSrm VR128:$src1, addr:$src2)>; 25340b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 25350b57cec5SDimitry Andric (XORPSrm VR128:$src1, addr:$src2)>; 25360b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 25370b57cec5SDimitry Andric (ANDNPSrm VR128:$src1, addr:$src2)>; 25380b57cec5SDimitry Andric 25390b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 25400b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions 25410b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 25440b57cec5SDimitry Andric/// vector forms. 25450b57cec5SDimitry Andric/// 25460b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 25470b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 25480b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar) 25490b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted). 25500b57cec5SDimitry Andric/// 25510b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem. 25520b57cec5SDimitry Andric/// 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 25550b57cec5SDimitry Andric/// classes below 25560b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 25570b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteSizes sched> { 25580b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 25590b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 25600b57cec5SDimitry Andric VR128, v4f32, f128mem, loadv4f32, 25610b57cec5SDimitry Andric SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; 25620b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 25630b57cec5SDimitry Andric VR128, v2f64, f128mem, loadv2f64, 25640b57cec5SDimitry Andric SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; 25650b57cec5SDimitry Andric 25660b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 25670b57cec5SDimitry Andric OpNode, VR256, v8f32, f256mem, loadv8f32, 25680b57cec5SDimitry Andric SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; 25690b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 25700b57cec5SDimitry Andric OpNode, VR256, v4f64, f256mem, loadv4f64, 25710b57cec5SDimitry Andric SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric 25740b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 25750b57cec5SDimitry Andric defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 25760b57cec5SDimitry Andric v4f32, f128mem, memopv4f32, SSEPackedSingle, 25770b57cec5SDimitry Andric sched.PS.XMM>, PS; 25780b57cec5SDimitry Andric defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 25790b57cec5SDimitry Andric v2f64, f128mem, memopv2f64, SSEPackedDouble, 25800b57cec5SDimitry Andric sched.PD.XMM>, PD; 25810b57cec5SDimitry Andric } 25820b57cec5SDimitry Andric} 25830b57cec5SDimitry Andric 25840b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 25850b57cec5SDimitry Andric X86SchedWriteSizes sched> { 25860b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 25870b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 25880b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 25890b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 25900b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 25910b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 25920b57cec5SDimitry Andric 25930b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 25940b57cec5SDimitry Andric defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 25950b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, 25960b57cec5SDimitry Andric sched.PS.Scl>, XS; 25970b57cec5SDimitry Andric defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 25980b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, 25990b57cec5SDimitry Andric sched.PD.Scl>, XD; 26000b57cec5SDimitry Andric } 26010b57cec5SDimitry Andric} 26020b57cec5SDimitry Andric 26030b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 26040b57cec5SDimitry Andric SDPatternOperator OpNode, 26050b57cec5SDimitry Andric X86SchedWriteSizes sched> { 26060b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 26070b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 26080b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; 26090b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 26100b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 26110b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; 26120b57cec5SDimitry Andric 26130b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 26140b57cec5SDimitry Andric defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 26150b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 26160b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl>, XS; 26170b57cec5SDimitry Andric defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 26180b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 26190b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl>, XD; 26200b57cec5SDimitry Andric } 26210b57cec5SDimitry Andric} 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric// Binary Arithmetic instructions 26240b57cec5SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, 26250b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, 26260b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 26270b57cec5SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, 26280b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, 26290b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 26300b57cec5SDimitry Andriclet isCommutable = 0 in { 26310b57cec5SDimitry Andric defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, 26320b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, 26330b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 26340b57cec5SDimitry Andric defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, 26350b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, 26360b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 26370b57cec5SDimitry Andric defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 26380b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 26390b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 26400b57cec5SDimitry Andric defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 26410b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 26420b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 26430b57cec5SDimitry Andric} 26440b57cec5SDimitry Andric 26450b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 26460b57cec5SDimitry Andric defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 26470b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 26480b57cec5SDimitry Andric defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 26490b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 26500b57cec5SDimitry Andric} 26510b57cec5SDimitry Andric 26520b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from 26530b57cec5SDimitry Andric// either: 26540b57cec5SDimitry Andric// 26550b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend 26560b57cec5SDimitry Andric// 26570b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector 26580b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions 26590b57cec5SDimitry Andric// like addss or mulss. 26600b57cec5SDimitry Andric// 26610b57cec5SDimitry Andric// For example, given the following code: 26620b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 26630b57cec5SDimitry Andric// A[0] += B[0]; 26640b57cec5SDimitry Andric// return A; 26650b57cec5SDimitry Andric// } 26660b57cec5SDimitry Andric// 26670b57cec5SDimitry Andric// Previously we generated: 26680b57cec5SDimitry Andric// addss %xmm0, %xmm1 26690b57cec5SDimitry Andric// movss %xmm1, %xmm0 26700b57cec5SDimitry Andric// 26710b57cec5SDimitry Andric// We now generate: 26720b57cec5SDimitry Andric// addss %xmm1, %xmm0 26730b57cec5SDimitry Andric// 26740b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert 26750b57cec5SDimitry Andric// 26760b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction 26770b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction. 26780b57cec5SDimitry Andric// 26790b57cec5SDimitry Andric// For example, given the following code: 26800b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 26810b57cec5SDimitry Andric// __m128 C = A + B; 26820b57cec5SDimitry Andric// return (__m128) {c[0], a[1], a[2], a[3]}; 26830b57cec5SDimitry Andric// } 26840b57cec5SDimitry Andric// 26850b57cec5SDimitry Andric// Previously we generated: 26860b57cec5SDimitry Andric// addps %xmm0, %xmm1 26870b57cec5SDimitry Andric// movss %xmm1, %xmm0 26880b57cec5SDimitry Andric// 26890b57cec5SDimitry Andric// We now generate: 26900b57cec5SDimitry Andric// addss %xmm1, %xmm0 26910b57cec5SDimitry Andric 26920b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of 26930b57cec5SDimitry Andric// patterns we have to try to match. 26940b57cec5SDimitry Andricmulticlass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move, 26950b57cec5SDimitry Andric ValueType VT, ValueType EltTy, 26960b57cec5SDimitry Andric RegisterClass RC, PatFrag ld_frag, 26970b57cec5SDimitry Andric Predicate BasePredicate> { 26980b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 26990b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 27000b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27010b57cec5SDimitry Andric (VT (scalar_to_vector 27020b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27030b57cec5SDimitry Andric RC:$src))))), 27040b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 27050b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 27060b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27070b57cec5SDimitry Andric (VT (scalar_to_vector 27080b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27090b57cec5SDimitry Andric (ld_frag addr:$src)))))), 27100b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 27110b57cec5SDimitry Andric } 27120b57cec5SDimitry Andric 27130b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 27140b57cec5SDimitry Andric let Predicates = [UseAVX] in { 27150b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 27160b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27170b57cec5SDimitry Andric (VT (scalar_to_vector 27180b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27190b57cec5SDimitry Andric RC:$src))))), 27200b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 27210b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 27220b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27230b57cec5SDimitry Andric (VT (scalar_to_vector 27240b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27250b57cec5SDimitry Andric (ld_frag addr:$src)))))), 27260b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 27270b57cec5SDimitry Andric } 27280b57cec5SDimitry Andric} 27290b57cec5SDimitry Andric 27300b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 27310b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 27320b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 27330b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 27340b57cec5SDimitry Andric 27350b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 27360b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 27370b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 27380b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 27390b57cec5SDimitry Andric 27400b57cec5SDimitry Andric/// Unop Arithmetic 27410b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 27420b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 27430b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a 27440b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined. 27450b57cec5SDimitry Andric/// 27460b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form. 27470b57cec5SDimitry Andric 27480b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form 27490b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because 27500b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive. 27510b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 27520b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 27530b57cec5SDimitry Andric Operand intmemop, SDNode OpNode, Domain d, 27540b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 27550b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 27560b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 27570b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 27580b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 27590b57cec5SDimitry Andric Requires<[target]>; 27600b57cec5SDimitry Andric let mayLoad = 1 in 27610b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 27620b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 27630b57cec5SDimitry Andric [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 27640b57cec5SDimitry Andric Sched<[sched.Folded]>, 27650b57cec5SDimitry Andric Requires<[target, OptForSize]>; 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 27690b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 27700b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 27710b57cec5SDimitry Andric Sched<[sched]>; 27720b57cec5SDimitry Andric let mayLoad = 1 in 27730b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 27740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 27750b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 27760b57cec5SDimitry Andric } 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric} 27790b57cec5SDimitry Andric 27800b57cec5SDimitry Andricmulticlass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt, 27810b57cec5SDimitry Andric ComplexPattern int_cpat, Intrinsic Intr, 27820b57cec5SDimitry Andric Predicate target, string Suffix> { 27830b57cec5SDimitry Andric let Predicates = [target] in { 27840b57cec5SDimitry Andric // These are unary operations, but they are modeled as having 2 source operands 27850b57cec5SDimitry Andric // because the high elements of the destination are unchanged in SSE. 27860b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 27870b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 27900b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 27910b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 27920b57cec5SDimitry Andric // movss mem, %xmm0 27930b57cec5SDimitry Andric // rcpss %xmm0, %xmm0 27940b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 27950b57cec5SDimitry Andric // rcpss mem, %xmm0 27960b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 27970b57cec5SDimitry Andric def : Pat<(Intr int_cpat:$src2), 27980b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 27990b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 28000b57cec5SDimitry Andric } 28010b57cec5SDimitry Andric} 28020b57cec5SDimitry Andric 28030b57cec5SDimitry Andricmulticlass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int_cpat, 28040b57cec5SDimitry Andric Intrinsic Intr, Predicate target> { 28050b57cec5SDimitry Andric let Predicates = [target] in { 28060b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 28070b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, 28080b57cec5SDimitry Andric VR128:$src)>; 28090b57cec5SDimitry Andric } 28100b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 28110b57cec5SDimitry Andric def : Pat<(Intr int_cpat:$src2), 28120b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 28130b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 28140b57cec5SDimitry Andric } 28150b57cec5SDimitry Andric} 28160b57cec5SDimitry Andric 28170b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 28180b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 28190b57cec5SDimitry Andric Operand intmemop, SDNode OpNode, Domain d, 28200b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 28210b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 28220b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 28230b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28240b57cec5SDimitry Andric [], d>, Sched<[sched]>; 28250b57cec5SDimitry Andric let mayLoad = 1 in 28260b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 28270b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28280b57cec5SDimitry Andric [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 28290b57cec5SDimitry Andric } 28300b57cec5SDimitry Andric let hasSideEffects = 0, ExeDomain = d in { 28310b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 28320b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 28330b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28340b57cec5SDimitry Andric []>, Sched<[sched]>; 28350b57cec5SDimitry Andric let mayLoad = 1 in 28360b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 28370b57cec5SDimitry Andric (ins VR128:$src1, intmemop:$src2), 28380b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28390b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 28400b57cec5SDimitry Andric } 28410b57cec5SDimitry Andric 28420b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 28430b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 28440b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 28450b57cec5SDimitry Andric // vmovss mem, %xmm0 28460b57cec5SDimitry Andric // vrcpss %xmm0, %xmm0, %xmm0 28470b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 28480b57cec5SDimitry Andric // vrcpss mem, %xmm0, %xmm0 28490b57cec5SDimitry Andric // TODO: In theory, we could fold the load, and avoid the stall caused by 28500b57cec5SDimitry Andric // the partial register store, either in BreakFalseDeps or with smarter RA. 28510b57cec5SDimitry Andric let Predicates = [target] in { 28520b57cec5SDimitry Andric def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 28530b57cec5SDimitry Andric (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 28540b57cec5SDimitry Andric } 28550b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 28560b57cec5SDimitry Andric def : Pat<(ScalarVT (OpNode (load addr:$src))), 28570b57cec5SDimitry Andric (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 28580b57cec5SDimitry Andric addr:$src)>; 28590b57cec5SDimitry Andric } 28600b57cec5SDimitry Andric} 28610b57cec5SDimitry Andric 28620b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form. 28630b57cec5SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 28640b57cec5SDimitry Andric X86SchedWriteWidths sched, list<Predicate> prds> { 28650b57cec5SDimitry Andriclet Predicates = prds in { 28660b57cec5SDimitry Andric def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 28670b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 28680b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 28690b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 28700b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 28710b57cec5SDimitry Andric def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 28720b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 28730b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 28740b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 28750b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 28760b57cec5SDimitry Andric def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 28770b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 28780b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 28790b57cec5SDimitry Andric [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 28800b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 28810b57cec5SDimitry Andric def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 28820b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 28830b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 28840b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 28850b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 28860b57cec5SDimitry Andric} 28870b57cec5SDimitry Andric 28880b57cec5SDimitry Andric def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 28890b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 28900b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 28910b57cec5SDimitry Andric Sched<[sched.XMM]>; 28920b57cec5SDimitry Andric def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 28930b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 28940b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 28950b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 28960b57cec5SDimitry Andric} 28970b57cec5SDimitry Andric 28980b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms. 28990b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 29000b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 29010b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 29020b57cec5SDimitry Andric def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29030b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29040b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29050b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 29060b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 29070b57cec5SDimitry Andric def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29080b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29090b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29100b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 29110b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 29120b57cec5SDimitry Andric def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 29130b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29140b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29150b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 29160b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 29170b57cec5SDimitry Andric def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 29180b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29190b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29200b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 29210b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 29220b57cec5SDimitry Andric} 29230b57cec5SDimitry Andric 29240b57cec5SDimitry Andric def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29250b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 29260b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 29270b57cec5SDimitry Andric Sched<[sched.XMM]>; 29280b57cec5SDimitry Andric def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29290b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 29300b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 29310b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 29320b57cec5SDimitry Andric} 29330b57cec5SDimitry Andric 29340b57cec5SDimitry Andricmulticlass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode, 29350b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 29360b57cec5SDimitry Andric defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 29370b57cec5SDimitry Andric !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), 29380b57cec5SDimitry Andric UseSSE1, "SS">, XS; 29390b57cec5SDimitry Andric defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 29400b57cec5SDimitry Andric !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), 29410b57cec5SDimitry Andric AVXTarget>, 29420b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; 29430b57cec5SDimitry Andric} 29440b57cec5SDimitry Andric 29450b57cec5SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 29460b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 29470b57cec5SDimitry Andric defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, f32, f32mem, 29480b57cec5SDimitry Andric ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; 29490b57cec5SDimitry Andric defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, f32, 29500b57cec5SDimitry Andric f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 29510b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 29520b57cec5SDimitry Andric} 29530b57cec5SDimitry Andric 29540b57cec5SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 29550b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 29560b57cec5SDimitry Andric defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, f64, f64mem, 29570b57cec5SDimitry Andric sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; 29580b57cec5SDimitry Andric defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, f64, 29590b57cec5SDimitry Andric f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 29600b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 29610b57cec5SDimitry Andric} 29620b57cec5SDimitry Andric 29630b57cec5SDimitry Andric// Square root. 29640b57cec5SDimitry Andricdefm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, 29650b57cec5SDimitry Andric sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 29660b57cec5SDimitry Andric sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, 29670b57cec5SDimitry Andric sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; 29680b57cec5SDimitry Andric 29690b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement 29700b57cec5SDimitry Andric// in order to obtain suitable precision. 29710b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 29720b57cec5SDimitry Andric sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 29730b57cec5SDimitry Andric sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 29740b57cec5SDimitry Andricdefm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 29750b57cec5SDimitry Andric sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 29760b57cec5SDimitry Andric sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 29770b57cec5SDimitry Andric 29780b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions. 29790b57cec5SDimitry Andric 29800b57cec5SDimitry Andricmulticlass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Move, 29810b57cec5SDimitry Andric ValueType VT, Predicate BasePredicate> { 29820b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 29830b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 29840b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 29850b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 29860b57cec5SDimitry Andric } 29870b57cec5SDimitry Andric 29880b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 29890b57cec5SDimitry Andric let Predicates = [UseAVX] in { 29900b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 29910b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 29920b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 29930b57cec5SDimitry Andric } 29940b57cec5SDimitry Andric} 29950b57cec5SDimitry Andric 29960b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 29970b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 30000b57cec5SDimitry Andric SDNode Move, ValueType VT, 30010b57cec5SDimitry Andric Predicate BasePredicate> { 30020b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 30030b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 30040b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30050b57cec5SDimitry Andric } 30060b57cec5SDimitry Andric 30070b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 30080b57cec5SDimitry Andric let Predicates = [HasAVX] in { 30090b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 30100b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30110b57cec5SDimitry Andric } 30120b57cec5SDimitry Andric} 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 30150b57cec5SDimitry Andric v4f32, UseSSE1>; 30160b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 30170b57cec5SDimitry Andric v4f32, UseSSE1>; 30180b57cec5SDimitry Andric 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 30210b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores 30220b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 30230b57cec5SDimitry Andric 30240b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 30250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 30260b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 30270b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 30280b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 30290b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 30300b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), 30310b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 30320b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 30330b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 30340b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 30350b57cec5SDimitry Andric [(alignednontemporalstore (v2f64 VR128:$src), 30360b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 30370b57cec5SDimitry Andric} // SchedRW 30380b57cec5SDimitry Andric 30390b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 30400b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 30410b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 30420b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 30430b57cec5SDimitry Andric [(alignednontemporalstore (v8f32 VR256:$src), 30440b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 30450b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 30460b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 30470b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 30480b57cec5SDimitry Andric [(alignednontemporalstore (v4f64 VR256:$src), 30490b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 30500b57cec5SDimitry Andric} // SchedRW 30510b57cec5SDimitry Andric 30520b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 30530b57cec5SDimitry Andricdef VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 30540b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 30550b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 30560b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), 30570b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG, 30580b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 30590b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 30600b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 30610b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 30620b57cec5SDimitry Andric [(alignednontemporalstore (v4i64 VR256:$src), 30630b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG, 30640b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 30650b57cec5SDimitry Andric} // ExeDomain 30660b57cec5SDimitry Andric} // Predicates 30670b57cec5SDimitry Andric 30680b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 30690b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 30700b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 30710b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 30720b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 30730b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 30740b57cec5SDimitry Andric [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 30750b57cec5SDimitry Andric} // SchedRW 30760b57cec5SDimitry Andric 30770b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 30780b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 30790b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 30800b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 30810b57cec5SDimitry Andric 30820b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in { 30830b57cec5SDimitry Andric// There is no AVX form for instructions below this point 30840b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 30850b57cec5SDimitry Andric "movnti{l}\t{$src, $dst|$dst, $src}", 30860b57cec5SDimitry Andric [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 30870b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 30880b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 30890b57cec5SDimitry Andric "movnti{q}\t{$src, $dst|$dst, $src}", 30900b57cec5SDimitry Andric [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 30910b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 30920b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT] 30930b57cec5SDimitry Andric 30940b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 30950b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 30960b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 30970b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 30980b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 30990b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 31000b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31010b57cec5SDimitry Andric 31020b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 31030b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31040b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 31050b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31060b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 31070b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31080b57cec5SDimitry Andric} 31090b57cec5SDimitry Andric 31100b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 31110b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 31120b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31130b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 31140b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31150b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 31160b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31170b57cec5SDimitry Andric} 31180b57cec5SDimitry Andric 31190b57cec5SDimitry Andric} // AddedComplexity 31200b57cec5SDimitry Andric 31210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31220b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence 31230b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31240b57cec5SDimitry Andric 31250b57cec5SDimitry Andric// Prefetch intrinsic. 31260b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 31270b57cec5SDimitry Andricdef PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 31280b57cec5SDimitry Andric "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; 31290b57cec5SDimitry Andricdef PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 31300b57cec5SDimitry Andric "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; 31310b57cec5SDimitry Andricdef PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 31320b57cec5SDimitry Andric "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; 31330b57cec5SDimitry Andricdef PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 31340b57cec5SDimitry Andric "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; 31350b57cec5SDimitry Andric} 31360b57cec5SDimitry Andric 31370b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled? 31380b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in { 31390b57cec5SDimitry Andric// Flush cache 31400b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 31410b57cec5SDimitry Andric "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 31420b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 31430b57cec5SDimitry Andric} 31440b57cec5SDimitry Andric 31450b57cec5SDimitry Andriclet SchedRW = [WriteNop] in { 31460b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it 31470b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible. 31480b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins), 31490b57cec5SDimitry Andric "pause", [(int_x86_sse2_pause)]>, OBXS; 31500b57cec5SDimitry Andric} 31510b57cec5SDimitry Andric 31520b57cec5SDimitry Andriclet SchedRW = [WriteFence] in { 31530b57cec5SDimitry Andric// Load, store, and memory fence 31540b57cec5SDimitry Andric// TODO: As with mfence, we may want to ease the availablity of sfence/lfence 31550b57cec5SDimitry Andric// to include any 64-bit target. 31560b57cec5SDimitry Andricdef SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 31570b57cec5SDimitry Andric PS, Requires<[HasSSE1]>; 31580b57cec5SDimitry Andricdef LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 31590b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 31600b57cec5SDimitry Andricdef MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 31610b57cec5SDimitry Andric PS, Requires<[HasMFence]>; 31620b57cec5SDimitry Andric} // SchedRW 31630b57cec5SDimitry Andric 31640b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>; 31650b57cec5SDimitry Andric 31660b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31670b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register 31680b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31690b57cec5SDimitry Andric 31700b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in 31710b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 31720b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 31730b57cec5SDimitry Andric VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; 31740b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in 31750b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 31760b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 31770b57cec5SDimitry Andric VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; 31780b57cec5SDimitry Andric 31790b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in 31800b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 31810b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 31820b57cec5SDimitry Andric TB, Sched<[WriteLDMXCSR]>; 31830b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in 31840b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 31850b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 31860b57cec5SDimitry Andric TB, Sched<[WriteSTMXCSR]>; 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 31890b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 31900b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 31910b57cec5SDimitry Andric 31920b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 31930b57cec5SDimitry Andric 31940b57cec5SDimitry Andriclet hasSideEffects = 0 in { 31950b57cec5SDimitry Andricdef VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 31960b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 31970b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 31980b57cec5SDimitry Andricdef VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 31990b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32000b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 32010b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 32020b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32030b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 32040b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 32050b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32060b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 32070b57cec5SDimitry Andric} 32080b57cec5SDimitry Andric 32090b57cec5SDimitry Andric// For Disassembler 32100b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 32110b57cec5SDimitry Andricdef VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32120b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32130b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 32140b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; 32150b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 32160b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32170b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 32180b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; 32190b57cec5SDimitry Andricdef VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32200b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32210b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 32220b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; 32230b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 32240b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32250b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 32260b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; 32270b57cec5SDimitry Andric} 32280b57cec5SDimitry Andric 32290b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 32300b57cec5SDimitry Andric hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 32310b57cec5SDimitry Andricdef VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 32320b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 32330b57cec5SDimitry Andric [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 32340b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 32350b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 32360b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32370b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 32380b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 32390b57cec5SDimitry Andricdef VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 32400b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 32410b57cec5SDimitry Andric [(set VR128:$dst, (loadv2i64 addr:$src))]>, 32420b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, 32430b57cec5SDimitry Andric XS, VEX, VEX_WIG; 32440b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 32450b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", []>, 32460b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 32470b57cec5SDimitry Andric XS, VEX, VEX_L, VEX_WIG; 32480b57cec5SDimitry Andric} 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 32510b57cec5SDimitry Andricdef VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 32520b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 32530b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 32540b57cec5SDimitry Andric [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 32550b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; 32560b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 32570b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 32580b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32590b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; 32600b57cec5SDimitry Andricdef VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 32610b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 32620b57cec5SDimitry Andric [(store (v2i64 VR128:$src), addr:$dst)]>, 32630b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; 32640b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 32650b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 32660b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; 32670b57cec5SDimitry Andric} 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 32700b57cec5SDimitry Andriclet hasSideEffects = 0 in { 32710b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32720b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>; 32730b57cec5SDimitry Andric 32740b57cec5SDimitry Andricdef MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32750b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32760b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 32770b57cec5SDimitry Andric} 32780b57cec5SDimitry Andric 32790b57cec5SDimitry Andric// For Disassembler 32800b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 32810b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32820b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32830b57cec5SDimitry Andric FoldGenData<"MOVDQArr">; 32840b57cec5SDimitry Andric 32850b57cec5SDimitry Andricdef MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32860b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32870b57cec5SDimitry Andric XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">; 32880b57cec5SDimitry Andric} 32890b57cec5SDimitry Andric} // SchedRW 32900b57cec5SDimitry Andric 32910b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 32920b57cec5SDimitry Andric hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 32930b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 32940b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 32950b57cec5SDimitry Andric [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 32960b57cec5SDimitry Andricdef MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 32970b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 32980b57cec5SDimitry Andric [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 32990b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 33000b57cec5SDimitry Andric} 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, 33030b57cec5SDimitry Andric SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 33040b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33050b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33060b57cec5SDimitry Andric [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 33070b57cec5SDimitry Andricdef MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33080b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 33090b57cec5SDimitry Andric [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 33100b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 33110b57cec5SDimitry Andric} 33120b57cec5SDimitry Andric 33130b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 33140b57cec5SDimitry Andric 33150b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 33160b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 33170b57cec5SDimitry Andric (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 33180b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 33190b57cec5SDimitry Andric (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 33200b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 33210b57cec5SDimitry Andric (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 33220b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 33230b57cec5SDimitry Andric (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 33240b57cec5SDimitry Andric 33250b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 33260b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 33270b57cec5SDimitry Andric (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 33280b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 33290b57cec5SDimitry Andric (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 33300b57cec5SDimitry Andric 33310b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 33320b57cec5SDimitry Andric // Additional patterns for other integer sizes. 33330b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 33340b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 33350b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 33360b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 33370b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 33380b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 33390b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 33400b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 33410b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 33420b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 33430b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 33440b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 33450b57cec5SDimitry Andric 33460b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 33470b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 33480b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 33490b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 33500b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 33510b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 33520b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 33530b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 33540b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 33550b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 33560b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 33570b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 33580b57cec5SDimitry Andric} 33590b57cec5SDimitry Andric 33600b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 33610b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions 33620b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 33630b57cec5SDimitry Andric 33640b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 33670b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 33680b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, RegisterClass RC, 33690b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 33700b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 33710b57cec5SDimitry Andric let isCommutable = 1 in 33720b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 33730b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 33740b57cec5SDimitry Andric !if(Is2Addr, 33750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 33760b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 33770b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 33780b57cec5SDimitry Andric Sched<[sched]>; 33790b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 33800b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 33810b57cec5SDimitry Andric !if(Is2Addr, 33820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 33830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 33840b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 33850b57cec5SDimitry Andric (memop_frag addr:$src2))))]>, 33860b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 33870b57cec5SDimitry Andric} 33880b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 33890b57cec5SDimitry Andric 33900b57cec5SDimitry Andricdefm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 33910b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 33920b57cec5SDimitry Andricdefm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 33930b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 33940b57cec5SDimitry Andricdefm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 33950b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 33960b57cec5SDimitry Andricdefm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 33970b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 33980b57cec5SDimitry Andricdefm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 33990b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34000b57cec5SDimitry Andricdefm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 34010b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34020b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 34030b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34040b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 34050b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34060b57cec5SDimitry Andricdefm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 34070b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34080b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 34090b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34100b57cec5SDimitry Andricdefm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 34110b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34120b57cec5SDimitry Andricdefm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 34130b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34140b57cec5SDimitry Andricdefm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 34150b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34160b57cec5SDimitry Andricdefm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 34170b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 34180b57cec5SDimitry Andricdefm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 34190b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 34200b57cec5SDimitry Andricdefm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 34210b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34220b57cec5SDimitry Andricdefm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 34230b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34240b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 34250b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34260b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 34270b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34280b57cec5SDimitry Andricdefm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 34290b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34300b57cec5SDimitry Andricdefm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 34310b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34320b57cec5SDimitry Andricdefm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 34330b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34340b57cec5SDimitry Andricdefm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 34350b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34360b57cec5SDimitry Andricdefm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, 34370b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34380b57cec5SDimitry Andricdefm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, 34390b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34400b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 34410b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX>; 34420b57cec5SDimitry Andric 34430b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 34440b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 34450b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 34460b57cec5SDimitry Andric VEX_4V, VEX_WIG; 34470b57cec5SDimitry Andric 34480b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 34490b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 34500b57cec5SDimitry Andric VR256, load, i256mem, SchedWriteVecIMul.YMM, 34510b57cec5SDimitry Andric 0>, VEX_4V, VEX_L, VEX_WIG; 34520b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 34530b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 34540b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM>; 34550b57cec5SDimitry Andric 34560b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 34570b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 34580b57cec5SDimitry Andric load, i128mem, SchedWritePSADBW.XMM, 0>, 34590b57cec5SDimitry Andric VEX_4V, VEX_WIG; 34600b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 34610b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 34620b57cec5SDimitry Andric load, i256mem, SchedWritePSADBW.YMM, 0>, 34630b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 34640b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 34650b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 34660b57cec5SDimitry Andric memop, i128mem, SchedWritePSADBW.XMM>; 34670b57cec5SDimitry Andric 34680b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34690b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 34700b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34710b57cec5SDimitry Andric 34720b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 34730b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 34740b57cec5SDimitry Andric SDNode OpNode2, RegisterClass RC, 34750b57cec5SDimitry Andric X86FoldableSchedWrite sched, 34760b57cec5SDimitry Andric X86FoldableSchedWrite schedImm, 34770b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, 34780b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 34790b57cec5SDimitry Andric // src2 is always 128-bit 34800b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 34810b57cec5SDimitry Andric (ins RC:$src1, VR128:$src2), 34820b57cec5SDimitry Andric !if(Is2Addr, 34830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34850b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 34860b57cec5SDimitry Andric Sched<[sched]>; 34870b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 34880b57cec5SDimitry Andric (ins RC:$src1, i128mem:$src2), 34890b57cec5SDimitry Andric !if(Is2Addr, 34900b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34910b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34920b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, 34930b57cec5SDimitry Andric (SrcVT (ld_frag addr:$src2)))))]>, 34940b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 34950b57cec5SDimitry Andric def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 34960b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 34970b57cec5SDimitry Andric !if(Is2Addr, 34980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3500*8bcb0991SDimitry Andric [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, 35010b57cec5SDimitry Andric Sched<[schedImm]>; 35020b57cec5SDimitry Andric} 35030b57cec5SDimitry Andric 35040b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 35050b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 35060b57cec5SDimitry Andric SDNode OpNode2, ValueType DstVT128, 35070b57cec5SDimitry Andric ValueType DstVT256, ValueType SrcVT, 35080b57cec5SDimitry Andric X86SchedWriteWidths sched, 35090b57cec5SDimitry Andric X86SchedWriteWidths schedImm, Predicate prd> { 35100b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 35110b57cec5SDimitry Andric defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 35120b57cec5SDimitry Andric OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 35130b57cec5SDimitry Andric DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG; 35140b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 35150b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 35160b57cec5SDimitry Andric OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 35170b57cec5SDimitry Andric DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, 35180b57cec5SDimitry Andric VEX_WIG; 35190b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35200b57cec5SDimitry Andric defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 35210b57cec5SDimitry Andric VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 35220b57cec5SDimitry Andric memop>; 35230b57cec5SDimitry Andric} 35240b57cec5SDimitry Andric 35250b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 35260b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, ValueType VT, 35270b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 35280b57cec5SDimitry Andric def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 35290b57cec5SDimitry Andric !if(Is2Addr, 35300b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35310b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3532*8bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, 35330b57cec5SDimitry Andric Sched<[sched]>; 35340b57cec5SDimitry Andric} 35350b57cec5SDimitry Andric 35360b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 35370b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 35380b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 35390b57cec5SDimitry Andric defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 35400b57cec5SDimitry Andric VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG; 35410b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 35420b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 35430b57cec5SDimitry Andric VR256, v32i8, sched.YMM, 0>, 35440b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 35450b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35460b57cec5SDimitry Andric defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 35470b57cec5SDimitry Andric sched.XMM>; 35480b57cec5SDimitry Andric} 35490b57cec5SDimitry Andric 35500b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 35510b57cec5SDimitry Andric defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 35520b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 35530b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 35540b57cec5SDimitry Andric defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 35550b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 35560b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 35570b57cec5SDimitry Andric defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 35580b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 35590b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 35600b57cec5SDimitry Andric 35610b57cec5SDimitry Andric defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 35620b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 35630b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 35640b57cec5SDimitry Andric defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 35650b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 35660b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 35670b57cec5SDimitry Andric defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 35680b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 35690b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 35700b57cec5SDimitry Andric 35710b57cec5SDimitry Andric defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 35720b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 35730b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 35740b57cec5SDimitry Andric defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 35750b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 35760b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 35790b57cec5SDimitry Andric SchedWriteShuffle>; 35800b57cec5SDimitry Andric defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 35810b57cec5SDimitry Andric SchedWriteShuffle>; 35820b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 35830b57cec5SDimitry Andric 35840b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35850b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions 35860b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 35890b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 35900b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 35910b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 35920b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 35930b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 35940b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 35950b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 35960b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 35970b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 35980b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 35990b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36020b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions 36030b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36040b57cec5SDimitry Andric 36050b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 36060b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 36070b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, 36080b57cec5SDimitry Andric Predicate prd> { 36090b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in { 36100b57cec5SDimitry Andric def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 36110b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 36120b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 36130b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36140b57cec5SDimitry Andric [(set VR128:$dst, 3615*8bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 36160b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 36170b57cec5SDimitry Andric def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 36180b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 36190b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 36200b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36210b57cec5SDimitry Andric [(set VR128:$dst, 36220b57cec5SDimitry Andric (vt128 (OpNode (load addr:$src1), 3623*8bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, 36240b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>, VEX_WIG; 36250b57cec5SDimitry Andric} 36260b57cec5SDimitry Andric 36270b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in { 36280b57cec5SDimitry Andric def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 36290b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 36300b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 36310b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36320b57cec5SDimitry Andric [(set VR256:$dst, 3633*8bcb0991SDimitry Andric (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, 36340b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 36350b57cec5SDimitry Andric def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 36360b57cec5SDimitry Andric (ins i256mem:$src1, u8imm:$src2), 36370b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 36380b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36390b57cec5SDimitry Andric [(set VR256:$dst, 36400b57cec5SDimitry Andric (vt256 (OpNode (load addr:$src1), 3641*8bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, VEX_L, 36420b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>, VEX_WIG; 36430b57cec5SDimitry Andric} 36440b57cec5SDimitry Andric 36450b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 36460b57cec5SDimitry Andric def ri : Ii8<0x70, MRMSrcReg, 36470b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 36480b57cec5SDimitry Andric !strconcat(OpcodeStr, 36490b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36500b57cec5SDimitry Andric [(set VR128:$dst, 3651*8bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 36520b57cec5SDimitry Andric Sched<[sched.XMM]>; 36530b57cec5SDimitry Andric def mi : Ii8<0x70, MRMSrcMem, 36540b57cec5SDimitry Andric (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 36550b57cec5SDimitry Andric !strconcat(OpcodeStr, 36560b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36570b57cec5SDimitry Andric [(set VR128:$dst, 36580b57cec5SDimitry Andric (vt128 (OpNode (memop addr:$src1), 3659*8bcb0991SDimitry Andric (i8 timm:$src2))))]>, 36600b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 36610b57cec5SDimitry Andric} 36620b57cec5SDimitry Andric} 36630b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 36640b57cec5SDimitry Andric 36650b57cec5SDimitry Andricdefm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 36660b57cec5SDimitry Andric SchedWriteShuffle, NoVLX>, PD; 36670b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 36680b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; 36690b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 36700b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36730b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX) 36740b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36750b57cec5SDimitry Andric 36760b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 36770b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 36780b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 36790b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 36800b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 36810b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 36820b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 36830b57cec5SDimitry Andric !if(Is2Addr, 36840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36850b57cec5SDimitry Andric !strconcat(OpcodeStr, 36860b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36870b57cec5SDimitry Andric [(set RC:$dst, 36880b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 36890b57cec5SDimitry Andric Sched<[sched]>; 36900b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 36910b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 36920b57cec5SDimitry Andric !if(Is2Addr, 36930b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36940b57cec5SDimitry Andric !strconcat(OpcodeStr, 36950b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36960b57cec5SDimitry Andric [(set RC:$dst, 36970b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 36980b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 36990b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 37000b57cec5SDimitry Andric} 37010b57cec5SDimitry Andric 37020b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 37030b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 37040b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 37050b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 37060b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, 37070b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 37080b57cec5SDimitry Andric !if(Is2Addr, 37090b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37100b57cec5SDimitry Andric !strconcat(OpcodeStr, 37110b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37120b57cec5SDimitry Andric [(set RC:$dst, 37130b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 37140b57cec5SDimitry Andric Sched<[sched]>; 37150b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, 37160b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 37170b57cec5SDimitry Andric !if(Is2Addr, 37180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37190b57cec5SDimitry Andric !strconcat(OpcodeStr, 37200b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37210b57cec5SDimitry Andric [(set RC:$dst, 37220b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 37230b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 37240b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 37250b57cec5SDimitry Andric} 37260b57cec5SDimitry Andric 37270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 37280b57cec5SDimitry Andric defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 37290b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 37300b57cec5SDimitry Andric VEX_4V, VEX_WIG; 37310b57cec5SDimitry Andric defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 37320b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 37330b57cec5SDimitry Andric VEX_4V, VEX_WIG; 37340b57cec5SDimitry Andric 37350b57cec5SDimitry Andric defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 37360b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 37370b57cec5SDimitry Andric VEX_4V, VEX_WIG; 37380b57cec5SDimitry Andric defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 37390b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 37400b57cec5SDimitry Andric VEX_4V; 37410b57cec5SDimitry Andric} 37420b57cec5SDimitry Andric 37430b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 37440b57cec5SDimitry Andric defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 37450b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 37460b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 37470b57cec5SDimitry Andric defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 37480b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 37490b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 37500b57cec5SDimitry Andric 37510b57cec5SDimitry Andric defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 37520b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 37530b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 37540b57cec5SDimitry Andric defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 37550b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 37560b57cec5SDimitry Andric VEX_4V, VEX_L; 37570b57cec5SDimitry Andric} 37580b57cec5SDimitry Andric 37590b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 37600b57cec5SDimitry Andric defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 37610b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 37620b57cec5SDimitry Andric defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 37630b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 37640b57cec5SDimitry Andric 37650b57cec5SDimitry Andric defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 37660b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 37670b57cec5SDimitry Andric 37680b57cec5SDimitry Andric defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 37690b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 37700b57cec5SDimitry Andric} 37710b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 37720b57cec5SDimitry Andric 37730b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37740b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions 37750b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37760b57cec5SDimitry Andric 37770b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 37780b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 37790b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 37800b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 37810b57cec5SDimitry Andric bit Is2Addr = 1> { 37820b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 37830b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 37840b57cec5SDimitry Andric !if(Is2Addr, 37850b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 37860b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37870b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 37880b57cec5SDimitry Andric Sched<[sched]>; 37890b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 37900b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 37910b57cec5SDimitry Andric !if(Is2Addr, 37920b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 37930b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37940b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 37950b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 37960b57cec5SDimitry Andric} 37970b57cec5SDimitry Andric 37980b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 37990b57cec5SDimitry Andric defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 38000b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38010b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38020b57cec5SDimitry Andric defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 38030b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38040b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38050b57cec5SDimitry Andric defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 38060b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38070b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38080b57cec5SDimitry Andric defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 38090b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38100b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38110b57cec5SDimitry Andric} 38120b57cec5SDimitry Andric 38130b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 38140b57cec5SDimitry Andric defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 38150b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38160b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38170b57cec5SDimitry Andric defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 38180b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38190b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38200b57cec5SDimitry Andric defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 38210b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38220b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38230b57cec5SDimitry Andric defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 38240b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38250b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38260b57cec5SDimitry Andric} 38270b57cec5SDimitry Andric 38280b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 38290b57cec5SDimitry Andric defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 38300b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38310b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38320b57cec5SDimitry Andric defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 38330b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38340b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38350b57cec5SDimitry Andric defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 38360b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38370b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38380b57cec5SDimitry Andric defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 38390b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38400b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38410b57cec5SDimitry Andric} 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 38440b57cec5SDimitry Andric defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 38450b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38460b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38470b57cec5SDimitry Andric defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 38480b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38490b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38500b57cec5SDimitry Andric defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 38510b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38520b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38530b57cec5SDimitry Andric defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 38540b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38550b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38560b57cec5SDimitry Andric} 38570b57cec5SDimitry Andric 38580b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 38590b57cec5SDimitry Andric defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 38600b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38610b57cec5SDimitry Andric defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 38620b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38630b57cec5SDimitry Andric defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 38640b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38650b57cec5SDimitry Andric defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 38660b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38670b57cec5SDimitry Andric 38680b57cec5SDimitry Andric defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 38690b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38700b57cec5SDimitry Andric defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 38710b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38720b57cec5SDimitry Andric defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 38730b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38740b57cec5SDimitry Andric defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 38750b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38760b57cec5SDimitry Andric} 38770b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 38780b57cec5SDimitry Andric 38790b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38800b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert 38810b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38820b57cec5SDimitry Andric 38830b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 38840b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> { 38850b57cec5SDimitry Andric def rr : Ii8<0xC4, MRMSrcReg, 38860b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 38870b57cec5SDimitry Andric GR32orGR64:$src2, u8imm:$src3), 38880b57cec5SDimitry Andric !if(Is2Addr, 38890b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 38900b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 38910b57cec5SDimitry Andric [(set VR128:$dst, 38920b57cec5SDimitry Andric (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 38930b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 38940b57cec5SDimitry Andric def rm : Ii8<0xC4, MRMSrcMem, 38950b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 38960b57cec5SDimitry Andric i16mem:$src2, u8imm:$src3), 38970b57cec5SDimitry Andric !if(Is2Addr, 38980b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 38990b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 39000b57cec5SDimitry Andric [(set VR128:$dst, 39010b57cec5SDimitry Andric (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 39020b57cec5SDimitry Andric imm:$src3))]>, 39030b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 39040b57cec5SDimitry Andric} 39050b57cec5SDimitry Andric 39060b57cec5SDimitry Andric// Extract 39070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 39080b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 39090b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 39100b57cec5SDimitry Andric "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 39110b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 39120b57cec5SDimitry Andric imm:$src2))]>, 39130b57cec5SDimitry Andric PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; 39140b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 39150b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 39160b57cec5SDimitry Andric "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 39170b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 39180b57cec5SDimitry Andric imm:$src2))]>, 39190b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 39200b57cec5SDimitry Andric 39210b57cec5SDimitry Andric// Insert 39220b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 39230b57cec5SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG; 39240b57cec5SDimitry Andric 39250b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 39260b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD; 39270b57cec5SDimitry Andric 39280b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 39290b57cec5SDimitry Andric 39300b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39310b57cec5SDimitry Andric// SSE2 - Packed Mask Creation 39320b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39330b57cec5SDimitry Andric 39340b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 39350b57cec5SDimitry Andric 39360b57cec5SDimitry Andricdef VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 39370b57cec5SDimitry Andric (ins VR128:$src), 39380b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 39390b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 39400b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG; 39410b57cec5SDimitry Andric 39420b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 39430b57cec5SDimitry Andricdef VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 39440b57cec5SDimitry Andric (ins VR256:$src), 39450b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 39460b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 39470b57cec5SDimitry Andric Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG; 39480b57cec5SDimitry Andric} 39490b57cec5SDimitry Andric 39500b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 39510b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 39520b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 39530b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>; 39540b57cec5SDimitry Andric 39550b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 39560b57cec5SDimitry Andric 39570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39580b57cec5SDimitry Andric// SSE2 - Conditional Store 39590b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39600b57cec5SDimitry Andric 39610b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 39620b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 39630b57cec5SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 39640b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 39650b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 39660b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 39670b57cec5SDimitry Andric VEX, VEX_WIG; 39680b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 39690b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 39700b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 39710b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 39720b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 39730b57cec5SDimitry Andric VEX, VEX_WIG; 39740b57cec5SDimitry Andric 39750b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 39760b57cec5SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 39770b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 39780b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 39790b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 39800b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 39810b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 39820b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; 39830b57cec5SDimitry Andric 39840b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 39850b57cec5SDimitry Andric 39860b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39870b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword 39880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39910b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int 39920b57cec5SDimitry Andric// 39930b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 39940b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 39950b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 39960b57cec5SDimitry Andric [(set VR128:$dst, 39970b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 39980b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 39990b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 40000b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40010b57cec5SDimitry Andric [(set VR128:$dst, 40020b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 40030b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 40040b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 40050b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 40060b57cec5SDimitry Andric [(set VR128:$dst, 40070b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 40080b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 40090b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 40100b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 40110b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 40120b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 40130b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 40140b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 40150b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 40160b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 40170b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 40200b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40210b57cec5SDimitry Andric [(set VR128:$dst, 40220b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 40230b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 40240b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 40250b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40260b57cec5SDimitry Andric [(set VR128:$dst, 40270b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 40280b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 40290b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 40300b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 40310b57cec5SDimitry Andric [(set VR128:$dst, 40320b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 40330b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 40340b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 40350b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 40360b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 40370b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 40380b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 40390b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 40400b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 40410b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 40420b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 40430b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40440b57cec5SDimitry Andric 40450b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40460b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar 40470b57cec5SDimitry Andric// 40480b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 40490b57cec5SDimitry Andric def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 40500b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40510b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 40520b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 40530b57cec5SDimitry Andric 40540b57cec5SDimitry Andric def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 40550b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40560b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 40570b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 40580b57cec5SDimitry Andric 40590b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 40600b57cec5SDimitry Andric 40610b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40620b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int 40630b57cec5SDimitry Andric// 40640b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 40650b57cec5SDimitry Andricdef VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 40660b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40670b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 40680b57cec5SDimitry Andric (iPTR 0)))]>, VEX, 40690b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 40700b57cec5SDimitry Andricdef VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 40710b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src), 40720b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40730b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 40740b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 40750b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 40760b57cec5SDimitry Andricdef MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 40770b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40780b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 40790b57cec5SDimitry Andric (iPTR 0)))]>, 40800b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 40810b57cec5SDimitry Andricdef MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 40820b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40830b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 40840b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 40850b57cec5SDimitry Andric Sched<[WriteVecStore]>; 40860b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40870b57cec5SDimitry Andric 40880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40890b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int 40900b57cec5SDimitry Andric// 40910b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 40920b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in { 40930b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 40940b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 40950b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 40960b57cec5SDimitry Andric (iPTR 0)))]>, 40970b57cec5SDimitry Andric VEX; 40980b57cec5SDimitry Andric 40990b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 41000b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41010b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 41020b57cec5SDimitry Andric (iPTR 0)))]>; 41030b57cec5SDimitry Andric} //SchedRW 41040b57cec5SDimitry Andric 41050b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 41060b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 41070b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src), 41080b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41090b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 41100b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 41110b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 41120b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41130b57cec5SDimitry Andric Sched<[WriteVecStore]>; 41140b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 41150b57cec5SDimitry Andric 41160b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41170b57cec5SDimitry Andric// Bitcast FR64 <-> GR64 41180b57cec5SDimitry Andric// 41190b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 41200b57cec5SDimitry Andric def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 41210b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41220b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 41230b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 41240b57cec5SDimitry Andric 41250b57cec5SDimitry Andric def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 41260b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41270b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 41280b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 41290b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 41300b57cec5SDimitry Andric 41310b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41320b57cec5SDimitry Andric// Move Scalar Single to Double Int 41330b57cec5SDimitry Andric// 41340b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 41350b57cec5SDimitry Andric def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 41360b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41370b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 41380b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 41390b57cec5SDimitry Andric def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 41400b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41410b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 41420b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 41430b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 41440b57cec5SDimitry Andric 41450b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 41460b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 41470b57cec5SDimitry Andric (VMOVDI2PDIrr GR32:$src)>; 41480b57cec5SDimitry Andric 41490b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 41500b57cec5SDimitry Andric (VMOV64toPQIrr GR64:$src)>; 41510b57cec5SDimitry Andric 41520b57cec5SDimitry Andric // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 41530b57cec5SDimitry Andric // These instructions also write zeros in the high part of a 256-bit register. 41540b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 41550b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 41560b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 41570b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 41580b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzload32 addr:$src)), 41590b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 41600b57cec5SDimitry Andric} 41610b57cec5SDimitry Andric 41620b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 41630b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 41640b57cec5SDimitry Andric (MOVDI2PDIrr GR32:$src)>; 41650b57cec5SDimitry Andric 41660b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 41670b57cec5SDimitry Andric (MOV64toPQIrr GR64:$src)>; 41680b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 41690b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 41700b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 41710b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 41720b57cec5SDimitry Andric} 41730b57cec5SDimitry Andric 41740b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 41750b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 41760b57cec5SDimitry Andric// these aliases. 41770b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 41780b57cec5SDimitry Andric (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 41790b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 41800b57cec5SDimitry Andric (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 41810b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 41820b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 41830b57cec5SDimitry Andric (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 41840b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 41850b57cec5SDimitry Andric (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 41860b57cec5SDimitry Andric 41870b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41880b57cec5SDimitry Andric// SSE2 - Move Quadword 41890b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41900b57cec5SDimitry Andric 41910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41920b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int 41930b57cec5SDimitry Andric// 41940b57cec5SDimitry Andric 41950b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 41960b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 41970b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 41980b57cec5SDimitry Andric [(set VR128:$dst, 41990b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 42000b57cec5SDimitry Andric VEX, Requires<[UseAVX]>, VEX_WIG; 42010b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 42020b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42030b57cec5SDimitry Andric [(set VR128:$dst, 42040b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 42050b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 42060b57cec5SDimitry Andric} // ExeDomain, SchedRW 42070b57cec5SDimitry Andric 42080b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42090b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int 42100b57cec5SDimitry Andric// 42110b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 42120b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 42130b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42140b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 42150b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 42160b57cec5SDimitry Andric VEX, VEX_WIG; 42170b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 42180b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42190b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 42200b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 42210b57cec5SDimitry Andric} // ExeDomain, SchedRW 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric// For disassembler only 42240b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 42250b57cec5SDimitry Andric SchedRW = [SchedWriteVecLogic.XMM] in { 42260b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 42270b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; 42280b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 42290b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>; 42300b57cec5SDimitry Andric} 42310b57cec5SDimitry Andric 42320b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 42330b57cec5SDimitry Andric (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 42340b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 42350b57cec5SDimitry Andric (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 42360b57cec5SDimitry Andric 42370b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 42380b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), 42390b57cec5SDimitry Andric (VMOVQI2PQIrm addr:$src)>; 42400b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzload64 addr:$src)), 42410b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 42420b57cec5SDimitry Andric 42430b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 42440b57cec5SDimitry Andric (VMOVPQI2QImr addr:$dst, VR128:$src)>; 42450b57cec5SDimitry Andric} 42460b57cec5SDimitry Andric 42470b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 42480b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 42490b57cec5SDimitry Andric 42500b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 42510b57cec5SDimitry Andric (MOVPQI2QImr addr:$dst, VR128:$src)>; 42520b57cec5SDimitry Andric} 42530b57cec5SDimitry Andric 42540b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42550b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 42560b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits. 42570b57cec5SDimitry Andric// 42580b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 42590b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 42600b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 42610b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 42620b57cec5SDimitry Andric XS, VEX, Requires<[UseAVX]>, VEX_WIG; 42630b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 42640b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42650b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 42660b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 42670b57cec5SDimitry Andric} // ExeDomain, SchedRW 42680b57cec5SDimitry Andric 42690b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 42700b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 42710b57cec5SDimitry Andric (VMOVZPQILo2PQIrr VR128:$src)>; 42720b57cec5SDimitry Andric} 42730b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 42740b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 42750b57cec5SDimitry Andric (MOVZPQILo2PQIrr VR128:$src)>; 42760b57cec5SDimitry Andric} 42770b57cec5SDimitry Andric 42780b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 42790b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 42800b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 42810b57cec5SDimitry Andric (v2f64 (VMOVZPQILo2PQIrr 42820b57cec5SDimitry Andric (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 42830b57cec5SDimitry Andric sub_xmm)>; 42840b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 42850b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 42860b57cec5SDimitry Andric (v2i64 (VMOVZPQILo2PQIrr 42870b57cec5SDimitry Andric (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 42880b57cec5SDimitry Andric sub_xmm)>; 42890b57cec5SDimitry Andric} 42900b57cec5SDimitry Andric 42910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42920b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 42930b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42940b57cec5SDimitry Andric 42950b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 42960b57cec5SDimitry Andric ValueType vt, RegisterClass RC, PatFrag mem_frag, 42970b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched> { 42980b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 42990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43000b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src)))]>, 43010b57cec5SDimitry Andric Sched<[sched]>; 43020b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 43030b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43040b57cec5SDimitry Andric [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 43050b57cec5SDimitry Andric Sched<[sched.Folded]>; 43060b57cec5SDimitry Andric} 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 43090b57cec5SDimitry Andric defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 43100b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 43110b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 43120b57cec5SDimitry Andric defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 43130b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 43140b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 43150b57cec5SDimitry Andric defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 43160b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 43170b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 43180b57cec5SDimitry Andric defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 43190b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 43200b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 43210b57cec5SDimitry Andric} 43220b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 43230b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 43240b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 43250b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 43260b57cec5SDimitry Andric 43270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 43280b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 43290b57cec5SDimitry Andric (VMOVSHDUPrr VR128:$src)>; 43300b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 43310b57cec5SDimitry Andric (VMOVSHDUPrm addr:$src)>; 43320b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 43330b57cec5SDimitry Andric (VMOVSLDUPrr VR128:$src)>; 43340b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 43350b57cec5SDimitry Andric (VMOVSLDUPrm addr:$src)>; 43360b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup VR256:$src)), 43370b57cec5SDimitry Andric (VMOVSHDUPYrr VR256:$src)>; 43380b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 43390b57cec5SDimitry Andric (VMOVSHDUPYrm addr:$src)>; 43400b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup VR256:$src)), 43410b57cec5SDimitry Andric (VMOVSLDUPYrr VR256:$src)>; 43420b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 43430b57cec5SDimitry Andric (VMOVSLDUPYrm addr:$src)>; 43440b57cec5SDimitry Andric} 43450b57cec5SDimitry Andric 43460b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 43470b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 43480b57cec5SDimitry Andric (MOVSHDUPrr VR128:$src)>; 43490b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 43500b57cec5SDimitry Andric (MOVSHDUPrm addr:$src)>; 43510b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 43520b57cec5SDimitry Andric (MOVSLDUPrr VR128:$src)>; 43530b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 43540b57cec5SDimitry Andric (MOVSLDUPrm addr:$src)>; 43550b57cec5SDimitry Andric} 43560b57cec5SDimitry Andric 43570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43580b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP 43590b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43600b57cec5SDimitry Andric 43610b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 43620b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 43630b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43640b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 43650b57cec5SDimitry Andric Sched<[sched.XMM]>; 43660b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 43670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43680b57cec5SDimitry Andric [(set VR128:$dst, 43690b57cec5SDimitry Andric (v2f64 (X86Movddup 43700b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src)))))]>, 43710b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 43720b57cec5SDimitry Andric} 43730b57cec5SDimitry Andric 43740b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version 43750b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 43760b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 43770b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43780b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 43790b57cec5SDimitry Andric Sched<[sched.YMM]>; 43800b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 43810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 43820b57cec5SDimitry Andric [(set VR256:$dst, 43830b57cec5SDimitry Andric (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 43840b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 43850b57cec5SDimitry Andric} 43860b57cec5SDimitry Andric 43870b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 43880b57cec5SDimitry Andric defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 43890b57cec5SDimitry Andric VEX, VEX_WIG; 43900b57cec5SDimitry Andric defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 43910b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 43920b57cec5SDimitry Andric} 43930b57cec5SDimitry Andric 43940b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 43950b57cec5SDimitry Andric 43960b57cec5SDimitry Andric 43970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4398*8bcb0991SDimitry Andric def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), 43990b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 44000b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 44010b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 44020b57cec5SDimitry Andric} 44030b57cec5SDimitry Andric 44040b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 44050b57cec5SDimitry Andric // No need for aligned memory as this only loads 64-bits. 4406*8bcb0991SDimitry Andric def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), 44070b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 44080b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 44090b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 44100b57cec5SDimitry Andric} 44110b57cec5SDimitry Andric 44120b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44130b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer 44140b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44150b57cec5SDimitry Andric 44160b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 44170b57cec5SDimitry Andric def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 44180b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 44190b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 44200b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 44210b57cec5SDimitry Andric def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 44220b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 44230b57cec5SDimitry Andric [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 44240b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; 44250b57cec5SDimitry Andric} // Predicates 44260b57cec5SDimitry Andric 44270b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 44280b57cec5SDimitry Andric "lddqu\t{$src, $dst|$dst, $src}", 44290b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 44300b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>; 44310b57cec5SDimitry Andric 44320b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44330b57cec5SDimitry Andric// SSE3 - Arithmetic 44340b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44350b57cec5SDimitry Andric 44360b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 44370b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 44380b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 44390b57cec5SDimitry Andric def rr : I<0xD0, MRMSrcReg, 44400b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 44410b57cec5SDimitry Andric !if(Is2Addr, 44420b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 44430b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 44440b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 44450b57cec5SDimitry Andric Sched<[sched]>; 44460b57cec5SDimitry Andric def rm : I<0xD0, MRMSrcMem, 44470b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 44480b57cec5SDimitry Andric !if(Is2Addr, 44490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 44500b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 44510b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 44520b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 44530b57cec5SDimitry Andric} 44540b57cec5SDimitry Andric 44550b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 44560b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 44570b57cec5SDimitry Andric defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 44580b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 44590b57cec5SDimitry Andric XD, VEX_4V, VEX_WIG; 44600b57cec5SDimitry Andric defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 44610b57cec5SDimitry Andric SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 44620b57cec5SDimitry Andric XD, VEX_4V, VEX_L, VEX_WIG; 44630b57cec5SDimitry Andric } 44640b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 44650b57cec5SDimitry Andric defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 44660b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 44670b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 44680b57cec5SDimitry Andric defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 44690b57cec5SDimitry Andric SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 44700b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 44710b57cec5SDimitry Andric } 44720b57cec5SDimitry Andric} 44730b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 44740b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 44750b57cec5SDimitry Andric defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 44760b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; 44770b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 44780b57cec5SDimitry Andric defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 44790b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; 44800b57cec5SDimitry Andric} 44810b57cec5SDimitry Andric 44820b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44830b57cec5SDimitry Andric// SSE3 Instructions 44840b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44850b57cec5SDimitry Andric 44860b57cec5SDimitry Andric// Horizontal ops 44870b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 44880b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 44890b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 44900b57cec5SDimitry Andric bit Is2Addr = 1> { 44910b57cec5SDimitry Andric def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 44920b57cec5SDimitry Andric !if(Is2Addr, 44930b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 44940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 44950b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 44960b57cec5SDimitry Andric Sched<[sched]>; 44970b57cec5SDimitry Andric 44980b57cec5SDimitry Andric def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 44990b57cec5SDimitry Andric !if(Is2Addr, 45000b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45010b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45020b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 45030b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 45040b57cec5SDimitry Andric} 45050b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 45060b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 45070b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 45080b57cec5SDimitry Andric bit Is2Addr = 1> { 45090b57cec5SDimitry Andric def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 45100b57cec5SDimitry Andric !if(Is2Addr, 45110b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45120b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45130b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 45140b57cec5SDimitry Andric Sched<[sched]>; 45150b57cec5SDimitry Andric 45160b57cec5SDimitry Andric def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 45170b57cec5SDimitry Andric !if(Is2Addr, 45180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45200b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 45210b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 45220b57cec5SDimitry Andric} 45230b57cec5SDimitry Andric 45240b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 45250b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 45260b57cec5SDimitry Andric defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 45270b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 45280b57cec5SDimitry Andric defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 45290b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 45300b57cec5SDimitry Andric defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 45310b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 45320b57cec5SDimitry Andric defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 45330b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 45340b57cec5SDimitry Andric } 45350b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 45360b57cec5SDimitry Andric defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 45370b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 45380b57cec5SDimitry Andric defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 45390b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 45400b57cec5SDimitry Andric defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 45410b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 45420b57cec5SDimitry Andric defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 45430b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 45440b57cec5SDimitry Andric } 45450b57cec5SDimitry Andric} 45460b57cec5SDimitry Andric 45470b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 45480b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 45490b57cec5SDimitry Andric defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 45500b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 45510b57cec5SDimitry Andric defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 45520b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 45530b57cec5SDimitry Andric } 45540b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 45550b57cec5SDimitry Andric defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 45560b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 45570b57cec5SDimitry Andric defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 45580b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 45590b57cec5SDimitry Andric } 45600b57cec5SDimitry Andric} 45610b57cec5SDimitry Andric 45620b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45630b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions 45640b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45650b57cec5SDimitry Andric 45660b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 45670b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 45680b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 45690b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 45700b57cec5SDimitry Andric (ins VR128:$src), 45710b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45720b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 45730b57cec5SDimitry Andric Sched<[sched.XMM]>; 45740b57cec5SDimitry Andric 45750b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 45760b57cec5SDimitry Andric (ins i128mem:$src), 45770b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45780b57cec5SDimitry Andric [(set VR128:$dst, 45790b57cec5SDimitry Andric (vt (OpNode (ld_frag addr:$src))))]>, 45800b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 45810b57cec5SDimitry Andric} 45820b57cec5SDimitry Andric 45830b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 45840b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 45850b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 45860b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 45870b57cec5SDimitry Andric (ins VR256:$src), 45880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45890b57cec5SDimitry Andric [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 45900b57cec5SDimitry Andric Sched<[sched.YMM]>; 45910b57cec5SDimitry Andric 45920b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 45930b57cec5SDimitry Andric (ins i256mem:$src), 45940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45950b57cec5SDimitry Andric [(set VR256:$dst, 45960b57cec5SDimitry Andric (vt (OpNode (load addr:$src))))]>, 45970b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 45980b57cec5SDimitry Andric} 45990b57cec5SDimitry Andric 46000b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 46010b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 46020b57cec5SDimitry Andric load>, VEX, VEX_WIG; 46030b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 46040b57cec5SDimitry Andric load>, VEX, VEX_WIG; 46050b57cec5SDimitry Andric} 46060b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 46070b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 46080b57cec5SDimitry Andric load>, VEX, VEX_WIG; 46090b57cec5SDimitry Andric} 46100b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 46110b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 46120b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 46130b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 46140b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 46150b57cec5SDimitry Andric} 46160b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 46170b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 46180b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 46190b57cec5SDimitry Andric} 46200b57cec5SDimitry Andric 46210b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 46220b57cec5SDimitry Andric memop>; 46230b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 46240b57cec5SDimitry Andric memop>; 46250b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 46260b57cec5SDimitry Andric memop>; 46270b57cec5SDimitry Andric 46280b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46290b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions 46300b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46310b57cec5SDimitry Andric 46320b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op 46330b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 46340b57cec5SDimitry Andric ValueType DstVT, ValueType OpVT, RegisterClass RC, 46350b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 46360b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 46370b57cec5SDimitry Andric let isCommutable = 1 in 46380b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 46390b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 46400b57cec5SDimitry Andric !if(Is2Addr, 46410b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46420b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46430b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 46440b57cec5SDimitry Andric Sched<[sched]>; 46450b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 46460b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 46470b57cec5SDimitry Andric !if(Is2Addr, 46480b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46500b57cec5SDimitry Andric [(set RC:$dst, 46510b57cec5SDimitry Andric (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 46520b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46530b57cec5SDimitry Andric} 46540b57cec5SDimitry Andric 46550b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 46560b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 46570b57cec5SDimitry Andric Intrinsic IntId128, X86FoldableSchedWrite sched, 46580b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 46590b57cec5SDimitry Andric let isCommutable = 1 in 46600b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 46610b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 46620b57cec5SDimitry Andric !if(Is2Addr, 46630b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46640b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46650b57cec5SDimitry Andric [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 46660b57cec5SDimitry Andric Sched<[sched]>; 46670b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 46680b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 46690b57cec5SDimitry Andric !if(Is2Addr, 46700b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46710b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46720b57cec5SDimitry Andric [(set VR128:$dst, 46730b57cec5SDimitry Andric (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 46740b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46750b57cec5SDimitry Andric} 46760b57cec5SDimitry Andric 46770b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 46780b57cec5SDimitry Andric Intrinsic IntId256, 46790b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 46800b57cec5SDimitry Andric let isCommutable = 1 in 46810b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 46820b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 46830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 46840b57cec5SDimitry Andric [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 46850b57cec5SDimitry Andric Sched<[sched]>; 46860b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 46870b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 46880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 46890b57cec5SDimitry Andric [(set VR256:$dst, 46900b57cec5SDimitry Andric (IntId256 VR256:$src1, (load addr:$src2)))]>, 46910b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46920b57cec5SDimitry Andric} 46930b57cec5SDimitry Andric 46940b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 46950b57cec5SDimitry Andriclet isCommutable = 0 in { 46960b57cec5SDimitry Andric defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 46970b57cec5SDimitry Andric VR128, load, i128mem, 46980b57cec5SDimitry Andric SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; 46990b57cec5SDimitry Andric defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 47000b57cec5SDimitry Andric v16i8, VR128, load, i128mem, 47010b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 47020b57cec5SDimitry Andric} 47030b57cec5SDimitry Andricdefm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 47040b57cec5SDimitry Andric VR128, load, i128mem, 47050b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 47060b57cec5SDimitry Andric} 47070b57cec5SDimitry Andric 47080b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in { 47090b57cec5SDimitry Andriclet isCommutable = 0 in { 47100b57cec5SDimitry Andric defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 47110b57cec5SDimitry Andric load, i128mem, 47120b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 47130b57cec5SDimitry Andric defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 47140b57cec5SDimitry Andric load, i128mem, 47150b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 47160b57cec5SDimitry Andric defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 47170b57cec5SDimitry Andric load, i128mem, 47180b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 47190b57cec5SDimitry Andric defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 47200b57cec5SDimitry Andric load, i128mem, 47210b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V; 47220b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 47230b57cec5SDimitry Andric int_x86_ssse3_psign_b_128, 47240b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 47250b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 47260b57cec5SDimitry Andric int_x86_ssse3_psign_w_128, 47270b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 47280b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 47290b57cec5SDimitry Andric int_x86_ssse3_psign_d_128, 47300b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 47310b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 47320b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 47330b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 47340b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 47350b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 47360b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 47370b57cec5SDimitry Andric} 47380b57cec5SDimitry Andric} 47390b57cec5SDimitry Andric 47400b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 47410b57cec5SDimitry Andriclet isCommutable = 0 in { 47420b57cec5SDimitry Andric defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 47430b57cec5SDimitry Andric VR256, load, i256mem, 47440b57cec5SDimitry Andric SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47450b57cec5SDimitry Andric defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 47460b57cec5SDimitry Andric v32i8, VR256, load, i256mem, 47470b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47480b57cec5SDimitry Andric} 47490b57cec5SDimitry Andricdefm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 47500b57cec5SDimitry Andric VR256, load, i256mem, 47510b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47520b57cec5SDimitry Andric} 47530b57cec5SDimitry Andric 47540b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in { 47550b57cec5SDimitry Andriclet isCommutable = 0 in { 47560b57cec5SDimitry Andric defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 47570b57cec5SDimitry Andric VR256, load, i256mem, 47580b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47590b57cec5SDimitry Andric defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 47600b57cec5SDimitry Andric load, i256mem, 47610b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47620b57cec5SDimitry Andric defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 47630b57cec5SDimitry Andric VR256, load, i256mem, 47640b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 47650b57cec5SDimitry Andric defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 47660b57cec5SDimitry Andric load, i256mem, 47670b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L; 47680b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 47690b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 47700b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 47710b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 47720b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 47730b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 47740b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 47750b57cec5SDimitry Andric int_x86_avx2_phadd_sw, 47760b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 47770b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 47780b57cec5SDimitry Andric int_x86_avx2_phsub_sw, 47790b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 47800b57cec5SDimitry Andric} 47810b57cec5SDimitry Andric} 47820b57cec5SDimitry Andric 47830b57cec5SDimitry Andric// None of these have i8 immediate fields. 47840b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in { 47850b57cec5SDimitry Andriclet isCommutable = 0 in { 47860b57cec5SDimitry Andric defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 47870b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 47880b57cec5SDimitry Andric defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 47890b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 47900b57cec5SDimitry Andric defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 47910b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 47920b57cec5SDimitry Andric defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 47930b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 47940b57cec5SDimitry Andric defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 47950b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 47960b57cec5SDimitry Andric defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 47970b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 47980b57cec5SDimitry Andric defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 47990b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 48000b57cec5SDimitry Andric defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 48010b57cec5SDimitry Andric memop, i128mem, SchedWriteVarShuffle.XMM>; 48020b57cec5SDimitry Andric defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 48030b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 48040b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 48050b57cec5SDimitry Andric defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 48060b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 48070b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 48080b57cec5SDimitry Andric defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 48090b57cec5SDimitry Andric v16i8, VR128, memop, i128mem, 48100b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 48110b57cec5SDimitry Andric} 48120b57cec5SDimitry Andricdefm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 48130b57cec5SDimitry Andric VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 48140b57cec5SDimitry Andric} 48150b57cec5SDimitry Andric 48160b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 48170b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns 48180b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 48190b57cec5SDimitry Andric 48200b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 48210b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 48220b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 48230b57cec5SDimitry Andric let hasSideEffects = 0 in { 48240b57cec5SDimitry Andric def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 48250b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 48260b57cec5SDimitry Andric !if(Is2Addr, 48270b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 48280b57cec5SDimitry Andric !strconcat(asm, 48290b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4830*8bcb0991SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, 48310b57cec5SDimitry Andric Sched<[sched]>; 48320b57cec5SDimitry Andric let mayLoad = 1 in 48330b57cec5SDimitry Andric def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 48340b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 48350b57cec5SDimitry Andric !if(Is2Addr, 48360b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 48370b57cec5SDimitry Andric !strconcat(asm, 48380b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 48390b57cec5SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, 48400b57cec5SDimitry Andric (memop_frag addr:$src2), 4841*8bcb0991SDimitry Andric (i8 timm:$src3))))]>, 48420b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 48430b57cec5SDimitry Andric } 48440b57cec5SDimitry Andric} 48450b57cec5SDimitry Andric 48460b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 48470b57cec5SDimitry Andric defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 48480b57cec5SDimitry Andric SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; 48490b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 48500b57cec5SDimitry Andric defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 48510b57cec5SDimitry Andric SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48520b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 48530b57cec5SDimitry Andric defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 48540b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 48550b57cec5SDimitry Andric 48560b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 48570b57cec5SDimitry Andric// SSSE3 - Thread synchronization 48580b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 48590b57cec5SDimitry Andric 48600b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 48610b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in 48620b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 48630b57cec5SDimitry Andric TB, Requires<[HasSSE3, Not64BitMode]>; 48640b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in 48650b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 48660b57cec5SDimitry Andric TB, Requires<[HasSSE3, In64BitMode]>; 48670b57cec5SDimitry Andric 48680b57cec5SDimitry Andriclet Uses = [ECX, EAX] in 48690b57cec5SDimitry Andricdef MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 48700b57cec5SDimitry Andric [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 48710b57cec5SDimitry Andric} // SchedRW 48720b57cec5SDimitry Andric 48730b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 48740b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 48750b57cec5SDimitry Andric 48760b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 48770b57cec5SDimitry Andric Requires<[Not64BitMode]>; 48780b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 48790b57cec5SDimitry Andric Requires<[In64BitMode]>; 48800b57cec5SDimitry Andric 48810b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 48820b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend 48830b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 48840b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 48850b57cec5SDimitry Andric 48860b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 48870b57cec5SDimitry Andric RegisterClass OutRC, RegisterClass InRC, 48880b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 48890b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 48900b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 48910b57cec5SDimitry Andric Sched<[sched]>; 48920b57cec5SDimitry Andric 48930b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 48940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 48950b57cec5SDimitry Andric Sched<[sched.Folded]>; 48960b57cec5SDimitry Andric} 48970b57cec5SDimitry Andric 48980b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 48990b57cec5SDimitry Andric X86MemOperand MemOp, X86MemOperand MemYOp, 49000b57cec5SDimitry Andric Predicate prd> { 49010b57cec5SDimitry Andric defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 49020b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 49030b57cec5SDimitry Andric let Predicates = [HasAVX, prd] in 49040b57cec5SDimitry Andric defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 49050b57cec5SDimitry Andric VR128, VR128, SchedWriteShuffle.XMM>, 49060b57cec5SDimitry Andric VEX, VEX_WIG; 49070b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in 49080b57cec5SDimitry Andric defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 49090b57cec5SDimitry Andric VR256, VR128, WriteShuffle256>, 49100b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 49110b57cec5SDimitry Andric} 49120b57cec5SDimitry Andric 49130b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 49140b57cec5SDimitry Andric X86MemOperand MemYOp, Predicate prd> { 49150b57cec5SDimitry Andric defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 49160b57cec5SDimitry Andric MemOp, MemYOp, prd>; 49170b57cec5SDimitry Andric defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 49180b57cec5SDimitry Andric !strconcat("pmovzx", OpcodeStr), 49190b57cec5SDimitry Andric MemOp, MemYOp, prd>; 49200b57cec5SDimitry Andric} 49210b57cec5SDimitry Andric 49220b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 49230b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 49240b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 49250b57cec5SDimitry Andric 49260b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 49270b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 49280b57cec5SDimitry Andric 49290b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 49300b57cec5SDimitry Andric 49310b57cec5SDimitry Andric// AVX2 Patterns 49320b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 49330b57cec5SDimitry Andric SDNode ExtOp, SDNode InVecOp> { 49340b57cec5SDimitry Andric // Register-Register patterns 49350b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 49360b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 49370b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 49380b57cec5SDimitry Andric } 49390b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 49400b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 49410b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 49420b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 49430b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 49440b57cec5SDimitry Andric 49450b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 49460b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 49470b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 49480b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 49490b57cec5SDimitry Andric 49500b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 49510b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 49520b57cec5SDimitry Andric } 49530b57cec5SDimitry Andric 49540b57cec5SDimitry Andric // Simple Register-Memory patterns 49550b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 49560b57cec5SDimitry Andric def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 49570b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 49580b57cec5SDimitry Andric 49590b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 49600b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 49610b57cec5SDimitry Andric } 49620b57cec5SDimitry Andric 49630b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 49640b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 49650b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 49660b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 49670b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 49680b57cec5SDimitry Andric 49690b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 49700b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 49710b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 49720b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 49730b57cec5SDimitry Andric 49740b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 49750b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 49760b57cec5SDimitry Andric } 49770b57cec5SDimitry Andric 49780b57cec5SDimitry Andric // AVX2 Register-Memory patterns 49790b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 49800b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 49810b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 49820b57cec5SDimitry Andric 49830b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 49840b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 49850b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))), 49860b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 49870b57cec5SDimitry Andric 49880b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 49890b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 49900b57cec5SDimitry Andric 49910b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 49920b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 49930b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))), 49940b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 49950b57cec5SDimitry Andric 49960b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 49970b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 49980b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))), 49990b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 50000b57cec5SDimitry Andric } 50010b57cec5SDimitry Andric} 50020b57cec5SDimitry Andric 50030b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 50040b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 50050b57cec5SDimitry Andric 50060b57cec5SDimitry Andric// SSE4.1/AVX patterns. 50070b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 50080b57cec5SDimitry Andric SDNode ExtOp> { 50090b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 50100b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 50110b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 50120b57cec5SDimitry Andric } 50130b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 50140b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 50150b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 50160b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 50170b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 50180b57cec5SDimitry Andric 50190b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 50200b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 50210b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 50220b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 50230b57cec5SDimitry Andric 50240b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 50250b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 50260b57cec5SDimitry Andric } 50270b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 50280b57cec5SDimitry Andric def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50290b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 50300b57cec5SDimitry Andric } 50310b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 50320b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50330b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 50340b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50350b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 50360b57cec5SDimitry Andric 50370b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50380b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 50390b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50400b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 50410b57cec5SDimitry Andric 50420b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 50430b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 50440b57cec5SDimitry Andric } 50450b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 50460b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 50470b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 50480b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 50490b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 50500b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 50510b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 50520b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 50530b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 50540b57cec5SDimitry Andric } 50550b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 50560b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 50570b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 50580b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 50590b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 50600b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 50610b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 50620b57cec5SDimitry Andric 50630b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 50640b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 50650b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 50660b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 50670b57cec5SDimitry Andric 50680b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 50690b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 50700b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 50710b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 50720b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 50730b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 50740b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 50750b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 50760b57cec5SDimitry Andric 50770b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 50780b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 50790b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 50800b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 50810b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 50820b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 50830b57cec5SDimitry Andric 50840b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 50850b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 50860b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 50870b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 50880b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 50890b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 50900b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 50910b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 50920b57cec5SDimitry Andric } 50930b57cec5SDimitry Andric} 50940b57cec5SDimitry Andric 50950b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 50960b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 50970b57cec5SDimitry Andric 50980b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 50990b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 51000b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 51010b57cec5SDimitry Andric} 51020b57cec5SDimitry Andric 51030b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 51040b57cec5SDimitry Andric// SSE4.1 - Extract Instructions 51050b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 51060b57cec5SDimitry Andric 51070b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 51080b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> { 51090b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 51100b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 51110b57cec5SDimitry Andric !strconcat(OpcodeStr, 51120b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51130b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 51140b57cec5SDimitry Andric imm:$src2))]>, 51150b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 51160b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 51170b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 51180b57cec5SDimitry Andric (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 51190b57cec5SDimitry Andric !strconcat(OpcodeStr, 51200b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51210b57cec5SDimitry Andric [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), 51220b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 51230b57cec5SDimitry Andric} 51240b57cec5SDimitry Andric 51250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 51260b57cec5SDimitry Andric defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG; 51270b57cec5SDimitry Andric 51280b57cec5SDimitry Andricdefm PEXTRB : SS41I_extract8<0x14, "pextrb">; 51290b57cec5SDimitry Andric 51300b57cec5SDimitry Andric 51310b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 51320b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> { 51330b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 51340b57cec5SDimitry Andric def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 51350b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 51360b57cec5SDimitry Andric !strconcat(OpcodeStr, 51370b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 51380b57cec5SDimitry Andric Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>; 51390b57cec5SDimitry Andric 51400b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 51410b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 51420b57cec5SDimitry Andric (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 51430b57cec5SDimitry Andric !strconcat(OpcodeStr, 51440b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51450b57cec5SDimitry Andric [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))), 51460b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 51470b57cec5SDimitry Andric} 51480b57cec5SDimitry Andric 51490b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 51500b57cec5SDimitry Andric defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG; 51510b57cec5SDimitry Andric 51520b57cec5SDimitry Andricdefm PEXTRW : SS41I_extract16<0x15, "pextrw">; 51530b57cec5SDimitry Andric 51540b57cec5SDimitry Andric 51550b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 51560b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> { 51570b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 51580b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 51590b57cec5SDimitry Andric !strconcat(OpcodeStr, 51600b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51610b57cec5SDimitry Andric [(set GR32:$dst, 51620b57cec5SDimitry Andric (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 51630b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 51640b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 51650b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 51660b57cec5SDimitry Andric !strconcat(OpcodeStr, 51670b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51680b57cec5SDimitry Andric [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 51690b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 51700b57cec5SDimitry Andric} 51710b57cec5SDimitry Andric 51720b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 51730b57cec5SDimitry Andric defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 51740b57cec5SDimitry Andric 51750b57cec5SDimitry Andricdefm PEXTRD : SS41I_extract32<0x16, "pextrd">; 51760b57cec5SDimitry Andric 51770b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 51780b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> { 51790b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 51800b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 51810b57cec5SDimitry Andric !strconcat(OpcodeStr, 51820b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51830b57cec5SDimitry Andric [(set GR64:$dst, 51840b57cec5SDimitry Andric (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 51850b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 51860b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 51870b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 51880b57cec5SDimitry Andric !strconcat(OpcodeStr, 51890b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 51900b57cec5SDimitry Andric [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 51910b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 51920b57cec5SDimitry Andric} 51930b57cec5SDimitry Andric 51940b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 51950b57cec5SDimitry Andric defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 51960b57cec5SDimitry Andric 51970b57cec5SDimitry Andricdefm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 51980b57cec5SDimitry Andric 51990b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 52000b57cec5SDimitry Andric/// destination 52010b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 52020b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 52030b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52040b57cec5SDimitry Andric !strconcat(OpcodeStr, 52050b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52060b57cec5SDimitry Andric [(set GR32orGR64:$dst, 52070b57cec5SDimitry Andric (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 52080b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52090b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52100b57cec5SDimitry Andric (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 52110b57cec5SDimitry Andric !strconcat(OpcodeStr, 52120b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52130b57cec5SDimitry Andric [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 52140b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52150b57cec5SDimitry Andric} 52160b57cec5SDimitry Andric 52170b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 52180b57cec5SDimitry Andric let Predicates = [UseAVX] in 52190b57cec5SDimitry Andric defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; 52200b57cec5SDimitry Andric defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 52210b57cec5SDimitry Andric} 52220b57cec5SDimitry Andric 52230b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52240b57cec5SDimitry Andric// SSE4.1 - Insert Instructions 52250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52260b57cec5SDimitry Andric 52270b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 52280b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 52290b57cec5SDimitry Andric (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 52300b57cec5SDimitry Andric !if(Is2Addr, 52310b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52320b57cec5SDimitry Andric !strconcat(asm, 52330b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52340b57cec5SDimitry Andric [(set VR128:$dst, 52350b57cec5SDimitry Andric (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 52360b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 52370b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 52380b57cec5SDimitry Andric (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 52390b57cec5SDimitry Andric !if(Is2Addr, 52400b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52410b57cec5SDimitry Andric !strconcat(asm, 52420b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52430b57cec5SDimitry Andric [(set VR128:$dst, 52440b57cec5SDimitry Andric (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>, 52450b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 52460b57cec5SDimitry Andric} 52470b57cec5SDimitry Andric 52480b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 52490b57cec5SDimitry Andric defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; 52500b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 52510b57cec5SDimitry Andric defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 52520b57cec5SDimitry Andric 52530b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 52540b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 52550b57cec5SDimitry Andric (ins VR128:$src1, GR32:$src2, u8imm:$src3), 52560b57cec5SDimitry Andric !if(Is2Addr, 52570b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52580b57cec5SDimitry Andric !strconcat(asm, 52590b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52600b57cec5SDimitry Andric [(set VR128:$dst, 52610b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 52620b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 52630b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 52640b57cec5SDimitry Andric (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 52650b57cec5SDimitry Andric !if(Is2Addr, 52660b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52670b57cec5SDimitry Andric !strconcat(asm, 52680b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52690b57cec5SDimitry Andric [(set VR128:$dst, 52700b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 52710b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 52720b57cec5SDimitry Andric} 52730b57cec5SDimitry Andric 52740b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 52750b57cec5SDimitry Andric defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 52760b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 52770b57cec5SDimitry Andric defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 52780b57cec5SDimitry Andric 52790b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 52800b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 52810b57cec5SDimitry Andric (ins VR128:$src1, GR64:$src2, u8imm:$src3), 52820b57cec5SDimitry Andric !if(Is2Addr, 52830b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52840b57cec5SDimitry Andric !strconcat(asm, 52850b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52860b57cec5SDimitry Andric [(set VR128:$dst, 52870b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 52880b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 52890b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 52900b57cec5SDimitry Andric (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 52910b57cec5SDimitry Andric !if(Is2Addr, 52920b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 52930b57cec5SDimitry Andric !strconcat(asm, 52940b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 52950b57cec5SDimitry Andric [(set VR128:$dst, 52960b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 52970b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 52980b57cec5SDimitry Andric} 52990b57cec5SDimitry Andric 53000b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 53010b57cec5SDimitry Andric defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 53020b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 53030b57cec5SDimitry Andric defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 53040b57cec5SDimitry Andric 53050b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which 53060b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination 53070b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements 53080b57cec5SDimitry Andric// in the target vector. 53090b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 53100b57cec5SDimitry Andric let isCommutable = 1 in 53110b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53120b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 53130b57cec5SDimitry Andric !if(Is2Addr, 53140b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53150b57cec5SDimitry Andric !strconcat(asm, 53160b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53170b57cec5SDimitry Andric [(set VR128:$dst, 5318*8bcb0991SDimitry Andric (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, 53190b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 53200b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 53210b57cec5SDimitry Andric (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 53220b57cec5SDimitry Andric !if(Is2Addr, 53230b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53240b57cec5SDimitry Andric !strconcat(asm, 53250b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53260b57cec5SDimitry Andric [(set VR128:$dst, 53270b57cec5SDimitry Andric (X86insertps VR128:$src1, 53280b57cec5SDimitry Andric (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 5329*8bcb0991SDimitry Andric timm:$src3))]>, 53300b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 53310b57cec5SDimitry Andric} 53320b57cec5SDimitry Andric 53330b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 53340b57cec5SDimitry Andric let Predicates = [UseAVX] in 53350b57cec5SDimitry Andric defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 53360b57cec5SDimitry Andric VEX_4V, VEX_WIG; 53370b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 53380b57cec5SDimitry Andric defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 53390b57cec5SDimitry Andric} 53400b57cec5SDimitry Andric 53410b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53420b57cec5SDimitry Andric// SSE4.1 - Round Instructions 53430b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53440b57cec5SDimitry Andric 53450b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 53460b57cec5SDimitry Andric X86MemOperand x86memop, RegisterClass RC, 53470b57cec5SDimitry Andric ValueType VT, PatFrag mem_frag, SDNode OpNode, 53480b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 53490b57cec5SDimitry Andric // Intrinsic operation, reg. 53500b57cec5SDimitry Andric // Vector intrinsic operation, reg 53510b57cec5SDimitry Andric def r : SS4AIi8<opc, MRMSrcReg, 53520b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 53530b57cec5SDimitry Andric !strconcat(OpcodeStr, 53540b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5355*8bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, 53560b57cec5SDimitry Andric Sched<[sched]>; 53570b57cec5SDimitry Andric 53580b57cec5SDimitry Andric // Vector intrinsic operation, mem 53590b57cec5SDimitry Andric def m : SS4AIi8<opc, MRMSrcMem, 53600b57cec5SDimitry Andric (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 53610b57cec5SDimitry Andric !strconcat(OpcodeStr, 53620b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53630b57cec5SDimitry Andric [(set RC:$dst, 5364*8bcb0991SDimitry Andric (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, 53650b57cec5SDimitry Andric Sched<[sched.Folded]>; 53660b57cec5SDimitry Andric} 53670b57cec5SDimitry Andric 53680b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 53690b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5370*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 53710b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 53720b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 53730b57cec5SDimitry Andric !strconcat(OpcodeStr, 53740b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 53750b57cec5SDimitry Andric []>, Sched<[sched]>; 53760b57cec5SDimitry Andric 53770b57cec5SDimitry Andric let mayLoad = 1 in 53780b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 53790b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 53800b57cec5SDimitry Andric !strconcat(OpcodeStr, 53810b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 53820b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 53830b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 53840b57cec5SDimitry Andric 5385*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 53860b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 53870b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 53880b57cec5SDimitry Andric !strconcat(OpcodeStr, 53890b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 53900b57cec5SDimitry Andric []>, Sched<[sched]>; 53910b57cec5SDimitry Andric 53920b57cec5SDimitry Andric let mayLoad = 1 in 53930b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 53940b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 53950b57cec5SDimitry Andric !strconcat(OpcodeStr, 53960b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 53970b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 53980b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 53990b57cec5SDimitry Andric} 54000b57cec5SDimitry Andric 54010b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 54020b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5403*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 54040b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 54050b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 54060b57cec5SDimitry Andric !strconcat(OpcodeStr, 54070b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54080b57cec5SDimitry Andric []>, Sched<[sched]>; 54090b57cec5SDimitry Andric 54100b57cec5SDimitry Andric let mayLoad = 1 in 54110b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 54120b57cec5SDimitry Andric (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 54130b57cec5SDimitry Andric !strconcat(OpcodeStr, 54140b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54150b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 54160b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 54170b57cec5SDimitry Andric 5418*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 54190b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 54200b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 54210b57cec5SDimitry Andric !strconcat(OpcodeStr, 54220b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54230b57cec5SDimitry Andric []>, Sched<[sched]>; 54240b57cec5SDimitry Andric 54250b57cec5SDimitry Andric let mayLoad = 1 in 54260b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 54270b57cec5SDimitry Andric (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 54280b57cec5SDimitry Andric !strconcat(OpcodeStr, 54290b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54300b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 54310b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 54320b57cec5SDimitry Andric} 54330b57cec5SDimitry Andric 54340b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd, 54350b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched, 54360b57cec5SDimitry Andric ValueType VT32, ValueType VT64, 54370b57cec5SDimitry Andric SDNode OpNode, bit Is2Addr = 1> { 5438*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle in { 54390b57cec5SDimitry Andric def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 54400b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 54410b57cec5SDimitry Andric !if(Is2Addr, 54420b57cec5SDimitry Andric !strconcat(OpcodeStr, 54430b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54440b57cec5SDimitry Andric !strconcat(OpcodeStr, 54450b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5446*8bcb0991SDimitry Andric [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 54470b57cec5SDimitry Andric Sched<[sched]>; 54480b57cec5SDimitry Andric 54490b57cec5SDimitry Andric def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 54500b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 54510b57cec5SDimitry Andric !if(Is2Addr, 54520b57cec5SDimitry Andric !strconcat(OpcodeStr, 54530b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54540b57cec5SDimitry Andric !strconcat(OpcodeStr, 54550b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54560b57cec5SDimitry Andric [(set VR128:$dst, 5457*8bcb0991SDimitry Andric (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>, 54580b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 54590b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 54600b57cec5SDimitry Andric 5461*8bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble in { 54620b57cec5SDimitry Andric def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 54630b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 54640b57cec5SDimitry Andric !if(Is2Addr, 54650b57cec5SDimitry Andric !strconcat(OpcodeStr, 54660b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54670b57cec5SDimitry Andric !strconcat(OpcodeStr, 54680b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5469*8bcb0991SDimitry Andric [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 54700b57cec5SDimitry Andric Sched<[sched]>; 54710b57cec5SDimitry Andric 54720b57cec5SDimitry Andric def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, 54730b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 54740b57cec5SDimitry Andric !if(Is2Addr, 54750b57cec5SDimitry Andric !strconcat(OpcodeStr, 54760b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54770b57cec5SDimitry Andric !strconcat(OpcodeStr, 54780b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54790b57cec5SDimitry Andric [(set VR128:$dst, 5480*8bcb0991SDimitry Andric (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>, 54810b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 54820b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 54830b57cec5SDimitry Andric} 54840b57cec5SDimitry Andric 54850b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd 54860b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 54870b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 54880b57cec5SDimitry Andric // Intrinsic form 54890b57cec5SDimitry Andric defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 54900b57cec5SDimitry Andric loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>, 54910b57cec5SDimitry Andric VEX, VEX_WIG; 54920b57cec5SDimitry Andric defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 54930b57cec5SDimitry Andric loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>, 54940b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 54950b57cec5SDimitry Andric } 54960b57cec5SDimitry Andric 54970b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 54980b57cec5SDimitry Andric defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 54990b57cec5SDimitry Andric loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>, 55000b57cec5SDimitry Andric VEX, VEX_WIG; 55010b57cec5SDimitry Andric defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 55020b57cec5SDimitry Andric loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>, 55030b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 55040b57cec5SDimitry Andric } 55050b57cec5SDimitry Andric} 55060b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 55070b57cec5SDimitry Andric defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 55080b57cec5SDimitry Andric v4f32, v2f64, X86RndScales, 0>, 55090b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 55100b57cec5SDimitry Andric defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 55110b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 55120b57cec5SDimitry Andric} 55130b57cec5SDimitry Andric 55140b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5515*8bcb0991SDimitry Andric def : Pat<(X86VRndScale FR32:$src1, timm:$src2), 5516*8bcb0991SDimitry Andric (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; 5517*8bcb0991SDimitry Andric def : Pat<(X86VRndScale FR64:$src1, timm:$src2), 5518*8bcb0991SDimitry Andric (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; 55190b57cec5SDimitry Andric} 55200b57cec5SDimitry Andric 55210b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 5522*8bcb0991SDimitry Andric def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), 5523*8bcb0991SDimitry Andric (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5524*8bcb0991SDimitry Andric def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), 5525*8bcb0991SDimitry Andric (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 55260b57cec5SDimitry Andric} 55270b57cec5SDimitry Andric 55280b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 55290b57cec5SDimitry Andricdefm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 55300b57cec5SDimitry Andric memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>; 55310b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 55320b57cec5SDimitry Andricdefm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 55330b57cec5SDimitry Andric memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>; 55340b57cec5SDimitry Andric 55350b57cec5SDimitry Andricdefm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 55360b57cec5SDimitry Andric 55370b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 55380b57cec5SDimitry Andricdefm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 55390b57cec5SDimitry Andric v4f32, v2f64, X86RndScales>; 55400b57cec5SDimitry Andric 55410b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 5542*8bcb0991SDimitry Andric def : Pat<(X86VRndScale FR32:$src1, timm:$src2), 5543*8bcb0991SDimitry Andric (ROUNDSSr FR32:$src1, timm:$src2)>; 5544*8bcb0991SDimitry Andric def : Pat<(X86VRndScale FR64:$src1, timm:$src2), 5545*8bcb0991SDimitry Andric (ROUNDSDr FR64:$src1, timm:$src2)>; 55460b57cec5SDimitry Andric} 55470b57cec5SDimitry Andric 55480b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in { 5549*8bcb0991SDimitry Andric def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), 5550*8bcb0991SDimitry Andric (ROUNDSSm addr:$src1, timm:$src2)>; 5551*8bcb0991SDimitry Andric def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), 5552*8bcb0991SDimitry Andric (ROUNDSDm addr:$src1, timm:$src2)>; 55530b57cec5SDimitry Andric} 55540b57cec5SDimitry Andric 55550b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 55560b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test 55570b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 55580b57cec5SDimitry Andric 55590b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from 55600b57cec5SDimitry Andric// the intel intrinsic that corresponds to this. 55610b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 55620b57cec5SDimitry Andricdef VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 55630b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 55640b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 55650b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG; 55660b57cec5SDimitry Andricdef VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 55670b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 55680b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 55690b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 55700b57cec5SDimitry Andric VEX, VEX_WIG; 55710b57cec5SDimitry Andric 55720b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 55730b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 55740b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 55750b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG; 55760b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 55770b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 55780b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 55790b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 55800b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 55810b57cec5SDimitry Andric} 55820b57cec5SDimitry Andric 55830b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 55840b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 55850b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 55860b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 55870b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>; 55880b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 55890b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 55900b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 55910b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 55920b57cec5SDimitry Andric} 55930b57cec5SDimitry Andric 55940b57cec5SDimitry Andric// The bit test instructions below are AVX only 55950b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 55960b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 55970b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 55980b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 55990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 56000b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 56010b57cec5SDimitry Andric Sched<[sched]>, VEX; 56020b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 56030b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 56040b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 56050b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 56060b57cec5SDimitry Andric} 56070b57cec5SDimitry Andric 56080b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 56090b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 56100b57cec5SDimitry Andricdefm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 56110b57cec5SDimitry Andric SchedWriteFTest.XMM>; 56120b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 56130b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 56140b57cec5SDimitry Andric} 56150b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 56160b57cec5SDimitry Andricdefm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 56170b57cec5SDimitry Andric SchedWriteFTest.XMM>; 56180b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 56190b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 56200b57cec5SDimitry Andric} 56210b57cec5SDimitry Andric} 56220b57cec5SDimitry Andric 56230b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 56240b57cec5SDimitry Andric// SSE4.1 - Misc Instructions 56250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 56260b57cec5SDimitry Andric 56270b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 56280b57cec5SDimitry Andric def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 56290b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 56300b57cec5SDimitry Andric [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 56310b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize16, XS; 56320b57cec5SDimitry Andric def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 56330b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 56340b57cec5SDimitry Andric [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 56350b57cec5SDimitry Andric (implicit EFLAGS)]>, 56360b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize16, XS; 56370b57cec5SDimitry Andric 56380b57cec5SDimitry Andric def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 56390b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 56400b57cec5SDimitry Andric [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 56410b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize32, XS; 56420b57cec5SDimitry Andric 56430b57cec5SDimitry Andric def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 56440b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 56450b57cec5SDimitry Andric [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 56460b57cec5SDimitry Andric (implicit EFLAGS)]>, 56470b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize32, XS; 56480b57cec5SDimitry Andric 56490b57cec5SDimitry Andric def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 56500b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 56510b57cec5SDimitry Andric [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 56520b57cec5SDimitry Andric Sched<[WritePOPCNT]>, XS; 56530b57cec5SDimitry Andric def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 56540b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 56550b57cec5SDimitry Andric [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 56560b57cec5SDimitry Andric (implicit EFLAGS)]>, 56570b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, XS; 56580b57cec5SDimitry Andric} 56590b57cec5SDimitry Andric 56600b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 56610b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 56620b57cec5SDimitry Andric SDNode OpNode, PatFrag ld_frag, 56630b57cec5SDimitry Andric X86FoldableSchedWrite Sched> { 56640b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 56650b57cec5SDimitry Andric (ins VR128:$src), 56660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 56670b57cec5SDimitry Andric [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 56680b57cec5SDimitry Andric Sched<[Sched]>; 56690b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 56700b57cec5SDimitry Andric (ins i128mem:$src), 56710b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 56720b57cec5SDimitry Andric [(set VR128:$dst, 56730b57cec5SDimitry Andric (v8i16 (OpNode (ld_frag addr:$src))))]>, 56740b57cec5SDimitry Andric Sched<[Sched.Folded]>; 56750b57cec5SDimitry Andric} 56760b57cec5SDimitry Andric 56770b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling 56780b57cec5SDimitry Andric// model, although the naming is misleading. 56790b57cec5SDimitry Andriclet Predicates = [HasAVX] in 56800b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 56810b57cec5SDimitry Andric X86phminpos, load, 56820b57cec5SDimitry Andric WritePHMINPOS>, VEX, VEX_WIG; 56830b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 56840b57cec5SDimitry Andric X86phminpos, memop, 56850b57cec5SDimitry Andric WritePHMINPOS>; 56860b57cec5SDimitry Andric 56870b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator. 56880b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 56890b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 56900b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 56910b57cec5SDimitry Andric bit Is2Addr = 1> { 56920b57cec5SDimitry Andric let isCommutable = 1 in 56930b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 56940b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 56950b57cec5SDimitry Andric !if(Is2Addr, 56960b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 56970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 56980b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 56990b57cec5SDimitry Andric Sched<[sched]>; 57000b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 57010b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 57020b57cec5SDimitry Andric !if(Is2Addr, 57030b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 57040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 57050b57cec5SDimitry Andric [(set RC:$dst, 57060b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 57070b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 57080b57cec5SDimitry Andric} 57090b57cec5SDimitry Andric 57100b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 57110b57cec5SDimitry Andric defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 57120b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57130b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57140b57cec5SDimitry Andric defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 57150b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57160b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57170b57cec5SDimitry Andric defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 57180b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57190b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57200b57cec5SDimitry Andric defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 57210b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57220b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57230b57cec5SDimitry Andric defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 57240b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 57250b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57260b57cec5SDimitry Andric} 57270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 57280b57cec5SDimitry Andric defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 57290b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57300b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57310b57cec5SDimitry Andric defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 57320b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57330b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57340b57cec5SDimitry Andric defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 57350b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57360b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57370b57cec5SDimitry Andric defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 57380b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 57390b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57400b57cec5SDimitry Andric} 57410b57cec5SDimitry Andric 57420b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 57430b57cec5SDimitry Andric defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 57440b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57450b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57460b57cec5SDimitry Andric defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 57470b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57480b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57490b57cec5SDimitry Andric defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 57500b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57510b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57520b57cec5SDimitry Andric defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 57530b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57540b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57550b57cec5SDimitry Andric defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 57560b57cec5SDimitry Andric load, i256mem, SchedWriteVecIMul.YMM, 0>, 57570b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57580b57cec5SDimitry Andric} 57590b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 57600b57cec5SDimitry Andric defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 57610b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57620b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57630b57cec5SDimitry Andric defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 57640b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57650b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57660b57cec5SDimitry Andric defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 57670b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57680b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57690b57cec5SDimitry Andric defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 57700b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 57710b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 57720b57cec5SDimitry Andric} 57730b57cec5SDimitry Andric 57740b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 57750b57cec5SDimitry Andric defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 57760b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57770b57cec5SDimitry Andric defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 57780b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57790b57cec5SDimitry Andric defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 57800b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57810b57cec5SDimitry Andric defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 57820b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57830b57cec5SDimitry Andric defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 57840b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57850b57cec5SDimitry Andric defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 57860b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57870b57cec5SDimitry Andric defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 57880b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57890b57cec5SDimitry Andric defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 57900b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 57910b57cec5SDimitry Andric defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 57920b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM, 1>; 57930b57cec5SDimitry Andric} 57940b57cec5SDimitry Andric 57950b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 57960b57cec5SDimitry Andric defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 57970b57cec5SDimitry Andric load, i128mem, SchedWritePMULLD.XMM, 0>, 57980b57cec5SDimitry Andric VEX_4V, VEX_WIG; 57990b57cec5SDimitry Andriclet Predicates = [HasAVX] in 58000b57cec5SDimitry Andric defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 58010b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58020b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58030b57cec5SDimitry Andric 58040b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 58050b57cec5SDimitry Andric defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 58060b57cec5SDimitry Andric load, i256mem, SchedWritePMULLD.YMM, 0>, 58070b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58080b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 58090b57cec5SDimitry Andric defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 58100b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58110b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58120b57cec5SDimitry Andric 58130b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 58140b57cec5SDimitry Andric defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 58150b57cec5SDimitry Andric memop, i128mem, SchedWritePMULLD.XMM, 1>; 58160b57cec5SDimitry Andric defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 58170b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 58180b57cec5SDimitry Andric} 58190b57cec5SDimitry Andric 58200b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 58210b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 58220b57cec5SDimitry Andric Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 58230b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 58240b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 58250b57cec5SDimitry Andric let isCommutable = 1 in 58260b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 58270b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 58280b57cec5SDimitry Andric !if(Is2Addr, 58290b57cec5SDimitry Andric !strconcat(OpcodeStr, 58300b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 58310b57cec5SDimitry Andric !strconcat(OpcodeStr, 58320b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5833*8bcb0991SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, 58340b57cec5SDimitry Andric Sched<[sched]>; 58350b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 58360b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 58370b57cec5SDimitry Andric !if(Is2Addr, 58380b57cec5SDimitry Andric !strconcat(OpcodeStr, 58390b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 58400b57cec5SDimitry Andric !strconcat(OpcodeStr, 58410b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 58420b57cec5SDimitry Andric [(set RC:$dst, 5843*8bcb0991SDimitry Andric (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, 58440b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 58450b57cec5SDimitry Andric} 58460b57cec5SDimitry Andric 58470b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 58480b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 58490b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 58500b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 58510b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 58520b57cec5SDimitry Andric let isCommutable = 1 in 58530b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 58540b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 58550b57cec5SDimitry Andric !if(Is2Addr, 58560b57cec5SDimitry Andric !strconcat(OpcodeStr, 58570b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 58580b57cec5SDimitry Andric !strconcat(OpcodeStr, 58590b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5860*8bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 58610b57cec5SDimitry Andric Sched<[sched]>; 58620b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 58630b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 58640b57cec5SDimitry Andric !if(Is2Addr, 58650b57cec5SDimitry Andric !strconcat(OpcodeStr, 58660b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 58670b57cec5SDimitry Andric !strconcat(OpcodeStr, 58680b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 58690b57cec5SDimitry Andric [(set RC:$dst, 5870*8bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 58710b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 58720b57cec5SDimitry Andric} 58730b57cec5SDimitry Andric 5874*8bcb0991SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<timm, [{ 58750b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x03; 58760b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x03, SDLoc(N)); 58770b57cec5SDimitry Andric}]>; 58780b57cec5SDimitry Andric 5879*8bcb0991SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<timm, [{ 58800b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x0f; 58810b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 58820b57cec5SDimitry Andric}]>; 58830b57cec5SDimitry Andric 5884*8bcb0991SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<timm, [{ 58850b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0xff; 58860b57cec5SDimitry Andric return getI8Imm(Imm ^ 0xff, SDLoc(N)); 58870b57cec5SDimitry Andric}]>; 58880b57cec5SDimitry Andric 58890b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 5890*8bcb0991SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<timm, [{ 58910b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 58920b57cec5SDimitry Andric uint8_t NewImm = 0; 58930b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 58940b57cec5SDimitry Andric if (Imm & (1 << i)) 58950b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 58960b57cec5SDimitry Andric } 58970b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 58980b57cec5SDimitry Andric}]>; 58990b57cec5SDimitry Andric 59000b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 5901*8bcb0991SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<timm, [{ 59020b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 59030b57cec5SDimitry Andric uint8_t NewImm = 0; 59040b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 59050b57cec5SDimitry Andric if (Imm & (1 << i)) 59060b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 59070b57cec5SDimitry Andric } 59080b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 59090b57cec5SDimitry Andric}]>; 59100b57cec5SDimitry Andric 59110b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 5912*8bcb0991SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<timm, [{ 59130b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 59140b57cec5SDimitry Andric uint8_t NewImm = 0; 59150b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 59160b57cec5SDimitry Andric if (Imm & (1 << i)) 59170b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 59180b57cec5SDimitry Andric } 59190b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 59200b57cec5SDimitry Andric}]>; 59210b57cec5SDimitry Andric 59220b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 5923*8bcb0991SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<timm, [{ 59240b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 59250b57cec5SDimitry Andric uint8_t NewImm = 0; 59260b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 59270b57cec5SDimitry Andric if (Imm & (1 << i)) 59280b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 59290b57cec5SDimitry Andric } 59300b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 59310b57cec5SDimitry Andric}]>; 59320b57cec5SDimitry Andric 59330b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 5934*8bcb0991SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<timm, [{ 59350b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 59360b57cec5SDimitry Andric uint8_t NewImm = 0; 59370b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 59380b57cec5SDimitry Andric if (Imm & (1 << i)) 59390b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 59400b57cec5SDimitry Andric } 59410b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 59420b57cec5SDimitry Andric}]>; 59430b57cec5SDimitry Andric 59440b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 5945*8bcb0991SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ 59460b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 59470b57cec5SDimitry Andric uint8_t NewImm = 0; 59480b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 59490b57cec5SDimitry Andric if (Imm & (1 << i)) 59500b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 59510b57cec5SDimitry Andric } 59520b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 59530b57cec5SDimitry Andric}]>; 59540b57cec5SDimitry Andric 59550b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 59560b57cec5SDimitry Andric let isCommutable = 0 in { 59570b57cec5SDimitry Andric defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 59580b57cec5SDimitry Andric VR128, load, i128mem, 0, 59590b57cec5SDimitry Andric SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; 59600b57cec5SDimitry Andric } 59610b57cec5SDimitry Andric 59620b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 59630b57cec5SDimitry Andric defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 59640b57cec5SDimitry Andric VR128, load, f128mem, 0, 59650b57cec5SDimitry Andric SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; 59660b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 59670b57cec5SDimitry Andric defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 59680b57cec5SDimitry Andric VR128, load, f128mem, 0, 59690b57cec5SDimitry Andric SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; 59700b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 59710b57cec5SDimitry Andric defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 59720b57cec5SDimitry Andric VR256, load, i256mem, 0, 59730b57cec5SDimitry Andric SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; 59740b57cec5SDimitry Andric} 59750b57cec5SDimitry Andric 59760b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 59770b57cec5SDimitry Andric let isCommutable = 0 in { 59780b57cec5SDimitry Andric defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 59790b57cec5SDimitry Andric VR256, load, i256mem, 0, 59800b57cec5SDimitry Andric SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; 59810b57cec5SDimitry Andric } 59820b57cec5SDimitry Andric} 59830b57cec5SDimitry Andric 59840b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 59850b57cec5SDimitry Andric let isCommutable = 0 in { 59860b57cec5SDimitry Andric defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 59870b57cec5SDimitry Andric VR128, memop, i128mem, 1, 59880b57cec5SDimitry Andric SchedWriteMPSAD.XMM>; 59890b57cec5SDimitry Andric } 59900b57cec5SDimitry Andric 59910b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 59920b57cec5SDimitry Andric defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 59930b57cec5SDimitry Andric VR128, memop, f128mem, 1, 59940b57cec5SDimitry Andric SchedWriteDPPS.XMM>; 59950b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 59960b57cec5SDimitry Andric defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 59970b57cec5SDimitry Andric VR128, memop, f128mem, 1, 59980b57cec5SDimitry Andric SchedWriteDPPD.XMM>; 59990b57cec5SDimitry Andric} 60000b57cec5SDimitry Andric 60010b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 60020b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 60030b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 60040b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, Domain d, 60050b57cec5SDimitry Andric X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 60060b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 60070b57cec5SDimitry Andric let isCommutable = 1 in 60080b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 60090b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 60100b57cec5SDimitry Andric !if(Is2Addr, 60110b57cec5SDimitry Andric !strconcat(OpcodeStr, 60120b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 60130b57cec5SDimitry Andric !strconcat(OpcodeStr, 60140b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6015*8bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 60160b57cec5SDimitry Andric Sched<[sched]>; 60170b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 60180b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 60190b57cec5SDimitry Andric !if(Is2Addr, 60200b57cec5SDimitry Andric !strconcat(OpcodeStr, 60210b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 60220b57cec5SDimitry Andric !strconcat(OpcodeStr, 60230b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 60240b57cec5SDimitry Andric [(set RC:$dst, 6025*8bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 60260b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 60270b57cec5SDimitry Andric} 60280b57cec5SDimitry Andric 60290b57cec5SDimitry Andric // Pattern to commute if load is in first source. 6030*8bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), 60310b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 6032*8bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 60330b57cec5SDimitry Andric} 60340b57cec5SDimitry Andric 60350b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 60360b57cec5SDimitry Andric defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 60370b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedSingle, 60380b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>, 60390b57cec5SDimitry Andric VEX_4V, VEX_WIG; 60400b57cec5SDimitry Andric defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 60410b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedSingle, 60420b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm8>, 60430b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 60440b57cec5SDimitry Andric defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 60450b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedDouble, 60460b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>, 60470b57cec5SDimitry Andric VEX_4V, VEX_WIG; 60480b57cec5SDimitry Andric defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 60490b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedDouble, 60500b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm4>, 60510b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 60520b57cec5SDimitry Andric defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 60530b57cec5SDimitry Andric VR128, load, i128mem, 0, SSEPackedInt, 60540b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>, 60550b57cec5SDimitry Andric VEX_4V, VEX_WIG; 60560b57cec5SDimitry Andric} 60570b57cec5SDimitry Andric 60580b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 60590b57cec5SDimitry Andric defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 60600b57cec5SDimitry Andric VR256, load, i256mem, 0, SSEPackedInt, 60610b57cec5SDimitry Andric SchedWriteBlend.YMM, BlendCommuteImm8>, 60620b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 60630b57cec5SDimitry Andric} 60640b57cec5SDimitry Andric 60650b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 60660b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on. 60670b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 6068*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 6069*8bcb0991SDimitry Andric (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6070*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 6071*8bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6072*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 6073*8bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; 60740b57cec5SDimitry Andric 60750b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 60760b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize. 6077*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6078*8bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6079*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 6080*8bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6081*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 6082*8bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 60830b57cec5SDimitry Andric 6084*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), 6085*8bcb0991SDimitry Andric (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6086*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), 6087*8bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6088*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), 6089*8bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; 60900b57cec5SDimitry Andric 60910b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 60920b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 6093*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6094*8bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6095*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), 6096*8bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6097*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), 6098*8bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 60990b57cec5SDimitry Andric} 61000b57cec5SDimitry Andric 61010b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 61020b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedSingle, 61030b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>; 61040b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 61050b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedDouble, 61060b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>; 61070b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 61080b57cec5SDimitry Andric VR128, memop, i128mem, 1, SSEPackedInt, 61090b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>; 61100b57cec5SDimitry Andric 61110b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 61120b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 61130b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 6114*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6115*8bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6116*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), 6117*8bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6118*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), 6119*8bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 61200b57cec5SDimitry Andric 6121*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6122*8bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6123*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), 6124*8bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6125*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), 6126*8bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 61270b57cec5SDimitry Andric} 61280b57cec5SDimitry Andric 61290b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 61300b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 61310b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 61320b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 61330b57cec5SDimitry Andric (VBLENDPDYrri VR256:$src1, 61340b57cec5SDimitry Andric (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 61350b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0x3)>; 61360b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 61370b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 61380b57cec5SDimitry Andric (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 61390b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 61400b57cec5SDimitry Andric 61410b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 61420b57cec5SDimitry Andric (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 61430b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 61440b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 61450b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 61460b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 61470b57cec5SDimitry Andric} 61480b57cec5SDimitry Andric 61490b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 61500b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 61510b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 61520b57cec5SDimitry Andric PatFrag mem_frag, SDNode OpNode, 61530b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 61540b57cec5SDimitry Andric def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 61550b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 61560b57cec5SDimitry Andric !strconcat(OpcodeStr, 61570b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 61580b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 61590b57cec5SDimitry Andric SSEPackedInt>, TAPD, VEX_4V, 61600b57cec5SDimitry Andric Sched<[sched]>; 61610b57cec5SDimitry Andric 61620b57cec5SDimitry Andric def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 61630b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, RC:$src3), 61640b57cec5SDimitry Andric !strconcat(OpcodeStr, 61650b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 61660b57cec5SDimitry Andric [(set RC:$dst, 61670b57cec5SDimitry Andric (OpNode RC:$src3, (mem_frag addr:$src2), 61680b57cec5SDimitry Andric RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, 61690b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold, 61700b57cec5SDimitry Andric // x86memop:$src2 61710b57cec5SDimitry Andric ReadDefault, ReadDefault, ReadDefault, ReadDefault, 61720b57cec5SDimitry Andric ReadDefault, 61730b57cec5SDimitry Andric // RC::$src3 61740b57cec5SDimitry Andric sched.ReadAfterFold]>; 61750b57cec5SDimitry Andric} 61760b57cec5SDimitry Andric 61770b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 61780b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 61790b57cec5SDimitry Andricdefm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 61800b57cec5SDimitry Andric v2f64, loadv2f64, X86Blendv, 61810b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 61820b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 61830b57cec5SDimitry Andric v4f64, loadv4f64, X86Blendv, 61840b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 61850b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble 61860b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 61870b57cec5SDimitry Andricdefm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 61880b57cec5SDimitry Andric v4f32, loadv4f32, X86Blendv, 61890b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 61900b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 61910b57cec5SDimitry Andric v8f32, loadv8f32, X86Blendv, 61920b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 61930b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle 61940b57cec5SDimitry Andricdefm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 61950b57cec5SDimitry Andric v16i8, loadv16i8, X86Blendv, 61960b57cec5SDimitry Andric SchedWriteVarBlend.XMM>; 61970b57cec5SDimitry Andric} 61980b57cec5SDimitry Andric 61990b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 62000b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 62010b57cec5SDimitry Andric v32i8, loadv32i8, X86Blendv, 62020b57cec5SDimitry Andric SchedWriteVarBlend.YMM>, VEX_L; 62030b57cec5SDimitry Andric} 62040b57cec5SDimitry Andric 62050b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 62060b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 62070b57cec5SDimitry Andric (v4i32 VR128:$src2))), 62080b57cec5SDimitry Andric (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 62090b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 62100b57cec5SDimitry Andric (v2i64 VR128:$src2))), 62110b57cec5SDimitry Andric (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 62120b57cec5SDimitry Andric def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 62130b57cec5SDimitry Andric (v8i32 VR256:$src2))), 62140b57cec5SDimitry Andric (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 62150b57cec5SDimitry Andric def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 62160b57cec5SDimitry Andric (v4i64 VR256:$src2))), 62170b57cec5SDimitry Andric (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 62180b57cec5SDimitry Andric} 62190b57cec5SDimitry Andric 62200b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 62210b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 62220b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 62230b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in { 62240b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 62250b57cec5SDimitry Andric (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 62260b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 62270b57cec5SDimitry Andric (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 62280b57cec5SDimitry Andric 62290b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 62300b57cec5SDimitry Andric (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 62310b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 62320b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 62330b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 62340b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 62350b57cec5SDimitry Andric 62360b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 62370b57cec5SDimitry Andric (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 62380b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 62390b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 62400b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 62410b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 62420b57cec5SDimitry Andric 62430b57cec5SDimitry Andric // Move low f32 and clear high bits. 62440b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 62450b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 62460b57cec5SDimitry Andric (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 62470b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 62480b57cec5SDimitry Andric (i8 1))), sub_xmm)>; 62490b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 62500b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 62510b57cec5SDimitry Andric (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 62520b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 62530b57cec5SDimitry Andric (i8 3))), sub_xmm)>; 62540b57cec5SDimitry Andric} 62550b57cec5SDimitry Andric 62560b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 62570b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 62580b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 62590b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in { 62600b57cec5SDimitry Andric // With SSE41 we can use blends for these patterns. 62610b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 62620b57cec5SDimitry Andric (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 62630b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 62640b57cec5SDimitry Andric (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 62650b57cec5SDimitry Andric 62660b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 62670b57cec5SDimitry Andric (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 62680b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 62690b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 62700b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 62710b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 62720b57cec5SDimitry Andric 62730b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 62740b57cec5SDimitry Andric (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 62750b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 62760b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 62770b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 62780b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 62790b57cec5SDimitry Andric} 62800b57cec5SDimitry Andric 62810b57cec5SDimitry Andric 62820b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator 62830b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in { 62840b57cec5SDimitry Andric multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 62850b57cec5SDimitry Andric PatFrag mem_frag, X86MemOperand x86memop, 62860b57cec5SDimitry Andric SDNode OpNode, X86FoldableSchedWrite sched> { 62870b57cec5SDimitry Andric def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 62880b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 62890b57cec5SDimitry Andric !strconcat(OpcodeStr, 62900b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 62910b57cec5SDimitry Andric [(set VR128:$dst, 62920b57cec5SDimitry Andric (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 62930b57cec5SDimitry Andric Sched<[sched]>; 62940b57cec5SDimitry Andric 62950b57cec5SDimitry Andric def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 62960b57cec5SDimitry Andric (ins VR128:$src1, x86memop:$src2), 62970b57cec5SDimitry Andric !strconcat(OpcodeStr, 62980b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 62990b57cec5SDimitry Andric [(set VR128:$dst, 63000b57cec5SDimitry Andric (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 63010b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 63020b57cec5SDimitry Andric } 63030b57cec5SDimitry Andric} 63040b57cec5SDimitry Andric 63050b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 63060b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 63070b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 63080b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 63090b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 63100b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 63110b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 63120b57cec5SDimitry Andric X86Blendv, SchedWriteVarBlend.XMM>; 63130b57cec5SDimitry Andric 63140b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument 63150b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 63160b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 63170b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 63180b57cec5SDimitry Andric (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 63190b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 63200b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 63210b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 63220b57cec5SDimitry Andric (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 63230b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 63240b57cec5SDimitry Andric (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 63250b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 63260b57cec5SDimitry Andric (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 63270b57cec5SDimitry Andric 63280b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 63290b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 63300b57cec5SDimitry Andric (v4i32 VR128:$src2))), 63310b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 63320b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 63330b57cec5SDimitry Andric (v2i64 VR128:$src2))), 63340b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 63350b57cec5SDimitry Andric} 63360b57cec5SDimitry Andric 63370b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 63380b57cec5SDimitry Andric 63390b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 63400b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 63410b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 63420b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; 63430b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 63440b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 63450b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 63460b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; 63470b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 63480b57cec5SDimitry Andric "movntdqa\t{$src, $dst|$dst, $src}", []>, 63490b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 63500b57cec5SDimitry Andric 63510b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 63520b57cec5SDimitry Andric def : Pat<(v8f32 (alignednontemporalload addr:$src)), 63530b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63540b57cec5SDimitry Andric def : Pat<(v4f64 (alignednontemporalload addr:$src)), 63550b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63560b57cec5SDimitry Andric def : Pat<(v4i64 (alignednontemporalload addr:$src)), 63570b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63580b57cec5SDimitry Andric def : Pat<(v8i32 (alignednontemporalload addr:$src)), 63590b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63600b57cec5SDimitry Andric def : Pat<(v16i16 (alignednontemporalload addr:$src)), 63610b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63620b57cec5SDimitry Andric def : Pat<(v32i8 (alignednontemporalload addr:$src)), 63630b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 63640b57cec5SDimitry Andric} 63650b57cec5SDimitry Andric 63660b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 63670b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 63680b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63690b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 63700b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63710b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 63720b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63730b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 63740b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63750b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 63760b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63770b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 63780b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 63790b57cec5SDimitry Andric} 63800b57cec5SDimitry Andric 63810b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 63820b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 63830b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63840b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 63850b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63860b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 63870b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63880b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 63890b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63900b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 63910b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63920b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 63930b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 63940b57cec5SDimitry Andric} 63950b57cec5SDimitry Andric 63960b57cec5SDimitry Andric} // AddedComplexity 63970b57cec5SDimitry Andric 63980b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 63990b57cec5SDimitry Andric// SSE4.2 - Compare Instructions 64000b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 64010b57cec5SDimitry Andric 64020b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator 64030b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 64040b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 64050b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 64060b57cec5SDimitry Andric bit Is2Addr = 1> { 64070b57cec5SDimitry Andric def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 64080b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 64090b57cec5SDimitry Andric !if(Is2Addr, 64100b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 64110b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 64120b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 64130b57cec5SDimitry Andric Sched<[sched]>; 64140b57cec5SDimitry Andric def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 64150b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 64160b57cec5SDimitry Andric !if(Is2Addr, 64170b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 64180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 64190b57cec5SDimitry Andric [(set RC:$dst, 64200b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 64210b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 64220b57cec5SDimitry Andric} 64230b57cec5SDimitry Andric 64240b57cec5SDimitry Andriclet Predicates = [HasAVX] in 64250b57cec5SDimitry Andric defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 64260b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 64270b57cec5SDimitry Andric VEX_4V, VEX_WIG; 64280b57cec5SDimitry Andric 64290b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 64300b57cec5SDimitry Andric defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 64310b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 64320b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 64330b57cec5SDimitry Andric 64340b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 64350b57cec5SDimitry Andric defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 64360b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM>; 64370b57cec5SDimitry Andric 64380b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 64390b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions 64400b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 64410b57cec5SDimitry Andric 64420b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> { 64430b57cec5SDimitry Andric def rr : SS42AI<0x62, MRMSrcReg, (outs), 64440b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 64450b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 64460b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM]>; 64470b57cec5SDimitry Andric let mayLoad = 1 in 64480b57cec5SDimitry Andric def rm :SS42AI<0x62, MRMSrcMem, (outs), 64490b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 64500b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 64510b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 64520b57cec5SDimitry Andric} 64530b57cec5SDimitry Andric 64540b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 64550b57cec5SDimitry Andric let Predicates = [HasAVX] in 64560b57cec5SDimitry Andric defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 64570b57cec5SDimitry Andric defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 64580b57cec5SDimitry Andric} 64590b57cec5SDimitry Andric 64600b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> { 64610b57cec5SDimitry Andric def rr : SS42AI<0x60, MRMSrcReg, (outs), 64620b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 64630b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 64640b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM]>; 64650b57cec5SDimitry Andric let mayLoad = 1 in 64660b57cec5SDimitry Andric def rm : SS42AI<0x60, MRMSrcMem, (outs), 64670b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 64680b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 64690b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 64700b57cec5SDimitry Andric} 64710b57cec5SDimitry Andric 64720b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 64730b57cec5SDimitry Andric let Predicates = [HasAVX] in 64740b57cec5SDimitry Andric defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 64750b57cec5SDimitry Andric defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 64760b57cec5SDimitry Andric} 64770b57cec5SDimitry Andric 64780b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> { 64790b57cec5SDimitry Andric def rr : SS42AI<0x63, MRMSrcReg, (outs), 64800b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 64810b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 64820b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI]>; 64830b57cec5SDimitry Andric let mayLoad = 1 in 64840b57cec5SDimitry Andric def rm : SS42AI<0x63, MRMSrcMem, (outs), 64850b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 64860b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 64870b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 64880b57cec5SDimitry Andric} 64890b57cec5SDimitry Andric 64900b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 64910b57cec5SDimitry Andric let Predicates = [HasAVX] in 64920b57cec5SDimitry Andric defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 64930b57cec5SDimitry Andric defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 64940b57cec5SDimitry Andric} 64950b57cec5SDimitry Andric 64960b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> { 64970b57cec5SDimitry Andric def rr : SS42AI<0x61, MRMSrcReg, (outs), 64980b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 64990b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 65000b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI]>; 65010b57cec5SDimitry Andric let mayLoad = 1 in 65020b57cec5SDimitry Andric def rm : SS42AI<0x61, MRMSrcMem, (outs), 65030b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 65040b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 65050b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 65060b57cec5SDimitry Andric} 65070b57cec5SDimitry Andric 65080b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 65090b57cec5SDimitry Andric let Predicates = [HasAVX] in 65100b57cec5SDimitry Andric defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 65110b57cec5SDimitry Andric defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 65120b57cec5SDimitry Andric} 65130b57cec5SDimitry Andric 65140b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65150b57cec5SDimitry Andric// SSE4.2 - CRC Instructions 65160b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65170b57cec5SDimitry Andric 65180b57cec5SDimitry Andric// No CRC instructions have AVX equivalents 65190b57cec5SDimitry Andric 65200b57cec5SDimitry Andric// crc intrinsic instruction 65210b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size 65220b57cec5SDimitry Andric// of r and m. 65230b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 65240b57cec5SDimitry Andric RegisterClass RCIn, SDPatternOperator Int> : 65250b57cec5SDimitry Andric SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 65260b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 65270b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, 65280b57cec5SDimitry Andric Sched<[WriteCRC32]>; 65290b57cec5SDimitry Andric 65300b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 65310b57cec5SDimitry Andric X86MemOperand x86memop, SDPatternOperator Int> : 65320b57cec5SDimitry Andric SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 65330b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 65340b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, 65350b57cec5SDimitry Andric Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; 65360b57cec5SDimitry Andric 65370b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 65380b57cec5SDimitry Andric def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 65390b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 65400b57cec5SDimitry Andric def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 65410b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 65420b57cec5SDimitry Andric def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 65430b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 65440b57cec5SDimitry Andric def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 65450b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 65460b57cec5SDimitry Andric def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 65470b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 65480b57cec5SDimitry Andric def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 65490b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 65500b57cec5SDimitry Andric def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 65510b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 65520b57cec5SDimitry Andric def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 65530b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 65540b57cec5SDimitry Andric let hasSideEffects = 0 in { 65550b57cec5SDimitry Andric let mayLoad = 1 in 65560b57cec5SDimitry Andric def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 65570b57cec5SDimitry Andric null_frag>, REX_W; 65580b57cec5SDimitry Andric def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 65590b57cec5SDimitry Andric null_frag>, REX_W; 65600b57cec5SDimitry Andric } 65610b57cec5SDimitry Andric} 65620b57cec5SDimitry Andric 65630b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65640b57cec5SDimitry Andric// SHA-NI Instructions 65650b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65660b57cec5SDimitry Andric 65670b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 65680b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 65690b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 65700b57cec5SDimitry Andric def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 65710b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 65720b57cec5SDimitry Andric !if(UsesXMM0, 65730b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 65740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 65750b57cec5SDimitry Andric [!if(UsesXMM0, 65760b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 65770b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 65780b57cec5SDimitry Andric T8, Sched<[sched]>; 65790b57cec5SDimitry Andric 65800b57cec5SDimitry Andric def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 65810b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 65820b57cec5SDimitry Andric !if(UsesXMM0, 65830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 65840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 65850b57cec5SDimitry Andric [!if(UsesXMM0, 65860b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 65870b57cec5SDimitry Andric (memop addr:$src2), XMM0)), 65880b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 65890b57cec5SDimitry Andric (memop addr:$src2))))]>, T8, 65900b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 65910b57cec5SDimitry Andric} 65920b57cec5SDimitry Andric 65930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 65940b57cec5SDimitry Andric def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 65950b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 65960b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 65970b57cec5SDimitry Andric [(set VR128:$dst, 65980b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 6599*8bcb0991SDimitry Andric (i8 timm:$src3)))]>, TA, 66000b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM]>; 66010b57cec5SDimitry Andric def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 66020b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 66030b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 66040b57cec5SDimitry Andric [(set VR128:$dst, 66050b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, 66060b57cec5SDimitry Andric (memop addr:$src2), 6607*8bcb0991SDimitry Andric (i8 timm:$src3)))]>, TA, 66080b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM.Folded, 66090b57cec5SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold]>; 66100b57cec5SDimitry Andric 66110b57cec5SDimitry Andric defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 66120b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 66130b57cec5SDimitry Andric defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 66140b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 66150b57cec5SDimitry Andric defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 66160b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 66170b57cec5SDimitry Andric 66180b57cec5SDimitry Andric let Uses=[XMM0] in 66190b57cec5SDimitry Andric defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 66200b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 1>; 66210b57cec5SDimitry Andric 66220b57cec5SDimitry Andric defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 66230b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 66240b57cec5SDimitry Andric defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 66250b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 66260b57cec5SDimitry Andric} 66270b57cec5SDimitry Andric 66280b57cec5SDimitry Andric// Aliases with explicit %xmm0 66290b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 66300b57cec5SDimitry Andric (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; 66310b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 66320b57cec5SDimitry Andric (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; 66330b57cec5SDimitry Andric 66340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66350b57cec5SDimitry Andric// AES-NI Instructions 66360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66370b57cec5SDimitry Andric 66380b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 66390b57cec5SDimitry Andric Intrinsic IntId, PatFrag ld_frag, 66400b57cec5SDimitry Andric bit Is2Addr = 0, RegisterClass RC = VR128, 66410b57cec5SDimitry Andric X86MemOperand MemOp = i128mem> { 66420b57cec5SDimitry Andric let AsmString = OpcodeStr## 66430b57cec5SDimitry Andric !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 66440b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 66450b57cec5SDimitry Andric def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 66460b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), "", 66470b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 66480b57cec5SDimitry Andric Sched<[WriteAESDecEnc]>; 66490b57cec5SDimitry Andric def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 66500b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2), "", 66510b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 66520b57cec5SDimitry Andric Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 66530b57cec5SDimitry Andric } 66540b57cec5SDimitry Andric} 66550b57cec5SDimitry Andric 66560b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow 66570b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 66580b57cec5SDimitry Andric defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 66590b57cec5SDimitry Andric int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG; 66600b57cec5SDimitry Andric defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 66610b57cec5SDimitry Andric int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG; 66620b57cec5SDimitry Andric defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 66630b57cec5SDimitry Andric int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG; 66640b57cec5SDimitry Andric defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 66650b57cec5SDimitry Andric int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG; 66660b57cec5SDimitry Andric} 66670b57cec5SDimitry Andric 66680b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in { 66690b57cec5SDimitry Andric defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 66700b57cec5SDimitry Andric int_x86_aesni_aesenc_256, load, 0, VR256, 66710b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 66720b57cec5SDimitry Andric defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 66730b57cec5SDimitry Andric int_x86_aesni_aesenclast_256, load, 0, VR256, 66740b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 66750b57cec5SDimitry Andric defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 66760b57cec5SDimitry Andric int_x86_aesni_aesdec_256, load, 0, VR256, 66770b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 66780b57cec5SDimitry Andric defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 66790b57cec5SDimitry Andric int_x86_aesni_aesdeclast_256, load, 0, VR256, 66800b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 66810b57cec5SDimitry Andric} 66820b57cec5SDimitry Andric 66830b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 66840b57cec5SDimitry Andric defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 66850b57cec5SDimitry Andric int_x86_aesni_aesenc, memop, 1>; 66860b57cec5SDimitry Andric defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 66870b57cec5SDimitry Andric int_x86_aesni_aesenclast, memop, 1>; 66880b57cec5SDimitry Andric defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 66890b57cec5SDimitry Andric int_x86_aesni_aesdec, memop, 1>; 66900b57cec5SDimitry Andric defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 66910b57cec5SDimitry Andric int_x86_aesni_aesdeclast, memop, 1>; 66920b57cec5SDimitry Andric} 66930b57cec5SDimitry Andric 66940b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation 66950b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 66960b57cec5SDimitry Andric def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 66970b57cec5SDimitry Andric (ins VR128:$src1), 66980b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 66990b57cec5SDimitry Andric [(set VR128:$dst, 67000b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 67010b57cec5SDimitry Andric VEX, VEX_WIG; 67020b57cec5SDimitry Andric def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 67030b57cec5SDimitry Andric (ins i128mem:$src1), 67040b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 67050b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 67060b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; 67070b57cec5SDimitry Andric} 67080b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 67090b57cec5SDimitry Andric (ins VR128:$src1), 67100b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 67110b57cec5SDimitry Andric [(set VR128:$dst, 67120b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 67130b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 67140b57cec5SDimitry Andric (ins i128mem:$src1), 67150b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 67160b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 67170b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>; 67180b57cec5SDimitry Andric 67190b57cec5SDimitry Andric// AES Round Key Generation Assist 67200b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 67210b57cec5SDimitry Andric def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 67220b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 67230b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 67240b57cec5SDimitry Andric [(set VR128:$dst, 6725*8bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 67260b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; 67270b57cec5SDimitry Andric def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 67280b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 67290b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 67300b57cec5SDimitry Andric [(set VR128:$dst, 6731*8bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, 67320b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; 67330b57cec5SDimitry Andric} 67340b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 67350b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 67360b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 67370b57cec5SDimitry Andric [(set VR128:$dst, 6738*8bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 67390b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>; 67400b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 67410b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 67420b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 67430b57cec5SDimitry Andric [(set VR128:$dst, 6744*8bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, 67450b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>; 67460b57cec5SDimitry Andric 67470b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67480b57cec5SDimitry Andric// PCLMUL Instructions 67490b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67500b57cec5SDimitry Andric 67510b57cec5SDimitry Andric// Immediate transform to help with commuting. 6752*8bcb0991SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<timm, [{ 67530b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 67540b57cec5SDimitry Andric return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 67550b57cec5SDimitry Andric}]>; 67560b57cec5SDimitry Andric 67570b57cec5SDimitry Andric// SSE carry-less Multiplication instructions 67580b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in { 67590b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 67600b57cec5SDimitry Andric let isCommutable = 1 in 67610b57cec5SDimitry Andric def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 67620b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 67630b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67640b57cec5SDimitry Andric [(set VR128:$dst, 6765*8bcb0991SDimitry Andric (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, 67660b57cec5SDimitry Andric Sched<[WriteCLMul]>; 67670b57cec5SDimitry Andric 67680b57cec5SDimitry Andric def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 67690b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 67700b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67710b57cec5SDimitry Andric [(set VR128:$dst, 67720b57cec5SDimitry Andric (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 6773*8bcb0991SDimitry Andric timm:$src3))]>, 67740b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 67750b57cec5SDimitry Andric } // Constraints = "$src1 = $dst" 67760b57cec5SDimitry Andric 67770b57cec5SDimitry Andric def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 6778*8bcb0991SDimitry Andric (i8 timm:$src3)), 67790b57cec5SDimitry Andric (PCLMULQDQrm VR128:$src1, addr:$src2, 6780*8bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 67810b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL] 67820b57cec5SDimitry Andric 67830b57cec5SDimitry Andric// SSE aliases 67840b57cec5SDimitry Andricforeach HI = ["hq","lq"] in 67850b57cec5SDimitry Andricforeach LO = ["hq","lq"] in { 67860b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 67870b57cec5SDimitry Andric (PCLMULQDQrr VR128:$dst, VR128:$src, 67880b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 67890b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 67900b57cec5SDimitry Andric (PCLMULQDQrm VR128:$dst, i128mem:$src, 67910b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 67920b57cec5SDimitry Andric} 67930b57cec5SDimitry Andric 67940b57cec5SDimitry Andric// AVX carry-less Multiplication instructions 67950b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 67960b57cec5SDimitry Andric PatFrag LdFrag, Intrinsic IntId> { 67970b57cec5SDimitry Andric let isCommutable = 1 in 67980b57cec5SDimitry Andric def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 67990b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 68000b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 68010b57cec5SDimitry Andric [(set RC:$dst, 6802*8bcb0991SDimitry Andric (IntId RC:$src1, RC:$src2, timm:$src3))]>, 68030b57cec5SDimitry Andric Sched<[WriteCLMul]>; 68040b57cec5SDimitry Andric 68050b57cec5SDimitry Andric def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 68060b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2, u8imm:$src3), 68070b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 68080b57cec5SDimitry Andric [(set RC:$dst, 6809*8bcb0991SDimitry Andric (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, 68100b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 68110b57cec5SDimitry Andric 68120b57cec5SDimitry Andric // We can commute a load in the first operand by swapping the sources and 68130b57cec5SDimitry Andric // rotating the immediate. 6814*8bcb0991SDimitry Andric def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), 68150b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, 6816*8bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 68170b57cec5SDimitry Andric} 68180b57cec5SDimitry Andric 68190b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 68200b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 68210b57cec5SDimitry Andric int_x86_pclmulqdq>, VEX_4V, VEX_WIG; 68220b57cec5SDimitry Andric 68230b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in 68240b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 68250b57cec5SDimitry Andric int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; 68260b57cec5SDimitry Andric 68270b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 68280b57cec5SDimitry Andric X86MemOperand MemOp, string Hi, string Lo> { 68290b57cec5SDimitry Andric def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68300b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, 68310b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 68320b57cec5SDimitry Andric def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68330b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, 68340b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 68350b57cec5SDimitry Andric} 68360b57cec5SDimitry Andric 68370b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 68380b57cec5SDimitry Andric X86MemOperand MemOp> { 68390b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 68400b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 68410b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 68420b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 68430b57cec5SDimitry Andric} 68440b57cec5SDimitry Andric 68450b57cec5SDimitry Andric// AVX aliases 68460b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 68470b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 68480b57cec5SDimitry Andric 68490b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68500b57cec5SDimitry Andric// SSE4A Instructions 68510b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68520b57cec5SDimitry Andric 68530b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in { 68540b57cec5SDimitry Andric 68550b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 68560b57cec5SDimitry Andriclet Constraints = "$src = $dst" in { 68570b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 68580b57cec5SDimitry Andric (ins VR128:$src, u8imm:$len, u8imm:$idx), 68590b57cec5SDimitry Andric "extrq\t{$idx, $len, $src|$src, $len, $idx}", 6860*8bcb0991SDimitry Andric [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, 6861*8bcb0991SDimitry Andric timm:$idx))]>, 68620b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 68630b57cec5SDimitry Andricdef EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 68640b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 68650b57cec5SDimitry Andric "extrq\t{$mask, $src|$src, $mask}", 68660b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 68670b57cec5SDimitry Andric VR128:$mask))]>, 68680b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 68690b57cec5SDimitry Andric 68700b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 68710b57cec5SDimitry Andric (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 68720b57cec5SDimitry Andric "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 68730b57cec5SDimitry Andric [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 6874*8bcb0991SDimitry Andric timm:$len, timm:$idx))]>, 68750b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 68760b57cec5SDimitry Andricdef INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 68770b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 68780b57cec5SDimitry Andric "insertq\t{$mask, $src|$src, $mask}", 68790b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 68800b57cec5SDimitry Andric VR128:$mask))]>, 68810b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 68820b57cec5SDimitry Andric} 68830b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 68840b57cec5SDimitry Andric 68850b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores. 68860b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 68870b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 68880b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 68890b57cec5SDimitry Andric "movntss\t{$src, $dst|$dst, $src}", []>, XS; 68900b57cec5SDimitry Andric 68910b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 68920b57cec5SDimitry Andric "movntsd\t{$src, $dst|$dst, $src}", []>, XD; 68930b57cec5SDimitry Andric} // SchedRW 68940b57cec5SDimitry Andric 68950b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst), 68960b57cec5SDimitry Andric (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 68970b57cec5SDimitry Andric 68980b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst), 68990b57cec5SDimitry Andric (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 69000b57cec5SDimitry Andric 69010b57cec5SDimitry Andric} // AddedComplexity 69020b57cec5SDimitry Andric} // HasSSE4A 69030b57cec5SDimitry Andric 69040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69050b57cec5SDimitry Andric// AVX Instructions 69060b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69070b57cec5SDimitry Andric 69080b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69090b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the 69100b57cec5SDimitry Andric// destination operand 69110b57cec5SDimitry Andric// 69120b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 69130b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 6914*8bcb0991SDimitry Andric PatFrag bcast_frag, SchedWrite Sched> : 69150b57cec5SDimitry Andric AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 69160b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6917*8bcb0991SDimitry Andric [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, 69180b57cec5SDimitry Andric Sched<[Sched]>, VEX; 69190b57cec5SDimitry Andric 69200b57cec5SDimitry Andric// AVX2 adds register forms 69210b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 69220b57cec5SDimitry Andric ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 69230b57cec5SDimitry Andric AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 69240b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 69250b57cec5SDimitry Andric [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 69260b57cec5SDimitry Andric Sched<[Sched]>, VEX; 69270b57cec5SDimitry Andric 69280b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 69290b57cec5SDimitry Andric def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 6930*8bcb0991SDimitry Andric f32mem, v4f32, X86VBroadcastld32, 69310b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>; 69320b57cec5SDimitry Andric def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 6933*8bcb0991SDimitry Andric f32mem, v8f32, X86VBroadcastld32, 69340b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 69350b57cec5SDimitry Andric} 69360b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 69370b57cec5SDimitry Andricdef VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 6938*8bcb0991SDimitry Andric v4f64, X86VBroadcastld64, 69390b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 69400b57cec5SDimitry Andric 69410b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 69420b57cec5SDimitry Andric def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 69430b57cec5SDimitry Andric v4f32, v4f32, SchedWriteFShuffle.XMM>; 69440b57cec5SDimitry Andric def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 69450b57cec5SDimitry Andric v8f32, v4f32, WriteFShuffle256>, VEX_L; 69460b57cec5SDimitry Andric} 69470b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 69480b57cec5SDimitry Andricdef VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 69490b57cec5SDimitry Andric v4f64, v2f64, WriteFShuffle256>, VEX_L; 69500b57cec5SDimitry Andric 69510b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69520b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 69530b57cec5SDimitry Andric// halves of a 256-bit vector. 69540b57cec5SDimitry Andric// 69550b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 69560b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 69570b57cec5SDimitry Andric (ins i128mem:$src), 69580b57cec5SDimitry Andric "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 69590b57cec5SDimitry Andric Sched<[WriteShuffleLd]>, VEX, VEX_L; 69600b57cec5SDimitry Andric 69610b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 69620b57cec5SDimitry Andric ExeDomain = SSEPackedSingle in 69630b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 69640b57cec5SDimitry Andric (ins f128mem:$src), 69650b57cec5SDimitry Andric "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 69660b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 69670b57cec5SDimitry Andric 69680b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 69690b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 69700b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69710b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))), 69720b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69730b57cec5SDimitry Andric} 69740b57cec5SDimitry Andric 69750b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 69760b57cec5SDimitry Andric// convert to integer when profitable. 69770b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 69780b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 69790b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69800b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))), 69810b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69820b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 69830b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69840b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 69850b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 69860b57cec5SDimitry Andric} 69870b57cec5SDimitry Andric 69880b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69890b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values 69900b57cec5SDimitry Andric// 69910b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 69920b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 69930b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 69940b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 69950b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 69960b57cec5SDimitry Andriclet mayLoad = 1 in 69970b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 69980b57cec5SDimitry Andric (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 69990b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 70000b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 70010b57cec5SDimitry Andric} 70020b57cec5SDimitry Andric 70030b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS 70040b57cec5SDimitry Andric// with YMM register containing zero. 70050b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs. 70060b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 70070b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 70080b57cec5SDimitry Andric} 70090b57cec5SDimitry Andric 70100b57cec5SDimitry Andricmulticlass vinsert_lowering<string InstrStr, ValueType From, ValueType To, 70110b57cec5SDimitry Andric PatFrag memop_frag> { 70120b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 70130b57cec5SDimitry Andric (iPTR imm)), 70140b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 70150b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 70160b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 70170b57cec5SDimitry Andric (From (memop_frag addr:$src2)), 70180b57cec5SDimitry Andric (iPTR imm)), 70190b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 70200b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 70210b57cec5SDimitry Andric} 70220b57cec5SDimitry Andric 70230b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 70240b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>; 70250b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>; 70260b57cec5SDimitry Andric} 70270b57cec5SDimitry Andric 70280b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 70290b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>; 70300b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>; 70310b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>; 70320b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>; 70330b57cec5SDimitry Andric} 70340b57cec5SDimitry Andric 70350b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70360b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values 70370b57cec5SDimitry Andric// 70380b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 70390b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 70400b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 70410b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 70420b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 70430b57cec5SDimitry Andriclet mayStore = 1 in 70440b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 70450b57cec5SDimitry Andric (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 70460b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 70470b57cec5SDimitry Andric []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 70480b57cec5SDimitry Andric} 70490b57cec5SDimitry Andric 70500b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 70510b57cec5SDimitry Andric def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 70520b57cec5SDimitry Andric (To (!cast<Instruction>(InstrStr#rr) 70530b57cec5SDimitry Andric (From VR256:$src1), 70540b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext)))>; 70550b57cec5SDimitry Andric def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 70560b57cec5SDimitry Andric (iPTR imm))), addr:$dst), 70570b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 70580b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext))>; 70590b57cec5SDimitry Andric} 70600b57cec5SDimitry Andric 70610b57cec5SDimitry Andric// AVX1 patterns 70620b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 70630b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 70640b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 70650b57cec5SDimitry Andric} 70660b57cec5SDimitry Andric 70670b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 70680b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 70690b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 70700b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 70710b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 70720b57cec5SDimitry Andric} 70730b57cec5SDimitry Andric 70740b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70750b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores 70760b57cec5SDimitry Andric// 70770b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 70780b57cec5SDimitry Andric Intrinsic IntLd, Intrinsic IntLd256, 7079*8bcb0991SDimitry Andric Intrinsic IntSt, Intrinsic IntSt256, 7080*8bcb0991SDimitry Andric X86SchedWriteMaskMove schedX, 7081*8bcb0991SDimitry Andric X86SchedWriteMaskMove schedY> { 70820b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 70830b57cec5SDimitry Andric (ins VR128:$src1, f128mem:$src2), 70840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 70850b57cec5SDimitry Andric [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7086*8bcb0991SDimitry Andric VEX_4V, Sched<[schedX.RM]>; 70870b57cec5SDimitry Andric def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 70880b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2), 70890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 70900b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7091*8bcb0991SDimitry Andric VEX_4V, VEX_L, Sched<[schedY.RM]>; 70920b57cec5SDimitry Andric def mr : AVX8I<opc_mr, MRMDestMem, (outs), 70930b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src1, VR128:$src2), 70940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 70950b57cec5SDimitry Andric [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 7096*8bcb0991SDimitry Andric VEX_4V, Sched<[schedX.MR]>; 70970b57cec5SDimitry Andric def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 70980b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src1, VR256:$src2), 70990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 71000b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7101*8bcb0991SDimitry Andric VEX_4V, VEX_L, Sched<[schedY.MR]>; 71020b57cec5SDimitry Andric} 71030b57cec5SDimitry Andric 71040b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 71050b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 71060b57cec5SDimitry Andric int_x86_avx_maskload_ps, 71070b57cec5SDimitry Andric int_x86_avx_maskload_ps_256, 71080b57cec5SDimitry Andric int_x86_avx_maskstore_ps, 7109*8bcb0991SDimitry Andric int_x86_avx_maskstore_ps_256, 7110*8bcb0991SDimitry Andric WriteFMaskMove32, WriteFMaskMove32Y>; 71110b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 71120b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 71130b57cec5SDimitry Andric int_x86_avx_maskload_pd, 71140b57cec5SDimitry Andric int_x86_avx_maskload_pd_256, 71150b57cec5SDimitry Andric int_x86_avx_maskstore_pd, 7116*8bcb0991SDimitry Andric int_x86_avx_maskstore_pd_256, 7117*8bcb0991SDimitry Andric WriteFMaskMove64, WriteFMaskMove64Y>; 71180b57cec5SDimitry Andric 71190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 71200b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values 71210b57cec5SDimitry Andric// 71220b57cec5SDimitry Andric 71230b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 71240b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop_f, 71250b57cec5SDimitry Andric X86MemOperand x86memop_i, 71260b57cec5SDimitry Andric ValueType f_vt, ValueType i_vt, 71270b57cec5SDimitry Andric X86FoldableSchedWrite sched, 71280b57cec5SDimitry Andric X86FoldableSchedWrite varsched> { 71290b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 71300b57cec5SDimitry Andric def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 71310b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 71320b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 71330b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, 71340b57cec5SDimitry Andric Sched<[varsched]>; 71350b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 71360b57cec5SDimitry Andric (ins RC:$src1, x86memop_i:$src2), 71370b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 71380b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 71390b57cec5SDimitry Andric (i_vt (load addr:$src2)))))]>, VEX_4V, 71400b57cec5SDimitry Andric Sched<[varsched.Folded, sched.ReadAfterFold]>; 71410b57cec5SDimitry Andric 71420b57cec5SDimitry Andric def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 71430b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 71440b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7145*8bcb0991SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, 71460b57cec5SDimitry Andric Sched<[sched]>; 71470b57cec5SDimitry Andric def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 71480b57cec5SDimitry Andric (ins x86memop_f:$src1, u8imm:$src2), 71490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 71500b57cec5SDimitry Andric [(set RC:$dst, 7151*8bcb0991SDimitry Andric (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, 71520b57cec5SDimitry Andric Sched<[sched.Folded]>; 71530b57cec5SDimitry Andric }// Predicates = [HasAVX, NoVLX] 71540b57cec5SDimitry Andric} 71550b57cec5SDimitry Andric 71560b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 71570b57cec5SDimitry Andric defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 71580b57cec5SDimitry Andric v4f32, v4i32, SchedWriteFShuffle.XMM, 71590b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 71600b57cec5SDimitry Andric defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 71610b57cec5SDimitry Andric v8f32, v8i32, SchedWriteFShuffle.YMM, 71620b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 71630b57cec5SDimitry Andric} 71640b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 71650b57cec5SDimitry Andric defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 71660b57cec5SDimitry Andric v2f64, v2i64, SchedWriteFShuffle.XMM, 71670b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 71680b57cec5SDimitry Andric defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 71690b57cec5SDimitry Andric v4f64, v4i64, SchedWriteFShuffle.YMM, 71700b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 71710b57cec5SDimitry Andric} 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 71740b57cec5SDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 71750b57cec5SDimitry Andric// 71760b57cec5SDimitry Andric 71770b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 71780b57cec5SDimitry Andriclet isCommutable = 1 in 71790b57cec5SDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 71800b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 71810b57cec5SDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 71820b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7183*8bcb0991SDimitry Andric (i8 timm:$src3))))]>, VEX_4V, VEX_L, 71840b57cec5SDimitry Andric Sched<[WriteFShuffle256]>; 71850b57cec5SDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 71860b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 71870b57cec5SDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 71880b57cec5SDimitry Andric [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), 7189*8bcb0991SDimitry Andric (i8 timm:$src3)))]>, VEX_4V, VEX_L, 71900b57cec5SDimitry Andric Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 71910b57cec5SDimitry Andric} 71920b57cec5SDimitry Andric 71930b57cec5SDimitry Andric// Immediate transform to help with commuting. 7194*8bcb0991SDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<timm, [{ 71950b57cec5SDimitry Andric return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 71960b57cec5SDimitry Andric}]>; 71970b57cec5SDimitry Andric 71980b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 71990b57cec5SDimitry Andric// Pattern with load in other operand. 72000b57cec5SDimitry Andricdef : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), 7201*8bcb0991SDimitry Andric VR256:$src1, (i8 timm:$imm))), 7202*8bcb0991SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; 72030b57cec5SDimitry Andric} 72040b57cec5SDimitry Andric 72050b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7206*8bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), 7207*8bcb0991SDimitry Andric (VPERM2F128rr VR256:$src1, VR256:$src2, timm:$imm)>; 72080b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 7209*8bcb0991SDimitry Andric (loadv4i64 addr:$src2), (i8 timm:$imm))), 7210*8bcb0991SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, timm:$imm)>; 72110b57cec5SDimitry Andric// Pattern with load in other operand. 72120b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), 7213*8bcb0991SDimitry Andric VR256:$src1, (i8 timm:$imm))), 7214*8bcb0991SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; 72150b57cec5SDimitry Andric} 72160b57cec5SDimitry Andric 72170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 72180b57cec5SDimitry Andric// VZERO - Zero YMM registers 72190b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31. 72200b57cec5SDimitry Andric// 72210b57cec5SDimitry Andric 72220b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 72230b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 72240b57cec5SDimitry Andric YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 72250b57cec5SDimitry Andric // Zero All YMM registers 72260b57cec5SDimitry Andric def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 72270b57cec5SDimitry Andric [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, 72280b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 72290b57cec5SDimitry Andric 72300b57cec5SDimitry Andric // Zero Upper bits of YMM registers 72310b57cec5SDimitry Andric def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 72320b57cec5SDimitry Andric [(int_x86_avx_vzeroupper)]>, PS, VEX, 72330b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 72340b57cec5SDimitry Andric} // Defs 72350b57cec5SDimitry Andric} // SchedRW 72360b57cec5SDimitry Andric 72370b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 72380b57cec5SDimitry Andric// Half precision conversion instructions 72390b57cec5SDimitry Andric// 72400b57cec5SDimitry Andric 72410b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 72420b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 72430b57cec5SDimitry Andric def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 72440b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 72450b57cec5SDimitry Andric [(set RC:$dst, (X86cvtph2ps VR128:$src))]>, 72460b57cec5SDimitry Andric T8PD, VEX, Sched<[sched]>; 72470b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 72480b57cec5SDimitry Andric def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 72490b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 72500b57cec5SDimitry Andric [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>, 72510b57cec5SDimitry Andric T8PD, VEX, Sched<[sched.Folded]>; 72520b57cec5SDimitry Andric} 72530b57cec5SDimitry Andric 72540b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 72550b57cec5SDimitry Andric SchedWrite RR, SchedWrite MR> { 72560b57cec5SDimitry Andric def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 72570b57cec5SDimitry Andric (ins RC:$src1, i32u8imm:$src2), 72580b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7259*8bcb0991SDimitry Andric [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>, 72600b57cec5SDimitry Andric TAPD, VEX, Sched<[RR]>; 72610b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 72620b57cec5SDimitry Andric def mr : Ii8<0x1D, MRMDestMem, (outs), 72630b57cec5SDimitry Andric (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 72640b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 72650b57cec5SDimitry Andric TAPD, VEX, Sched<[MR]>; 72660b57cec5SDimitry Andric} 72670b57cec5SDimitry Andric 72680b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 72690b57cec5SDimitry Andric defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>; 72700b57cec5SDimitry Andric defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L; 72710b57cec5SDimitry Andric defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 72720b57cec5SDimitry Andric WriteCvtPS2PHSt>; 72730b57cec5SDimitry Andric defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 72740b57cec5SDimitry Andric WriteCvtPS2PHYSt>, VEX_L; 72750b57cec5SDimitry Andric 72760b57cec5SDimitry Andric // Pattern match vcvtph2ps of a scalar i64 load. 72770b57cec5SDimitry Andric def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 72780b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 72790b57cec5SDimitry Andric def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 72800b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 72810b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 72820b57cec5SDimitry Andric 72830b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7284*8bcb0991SDimitry Andric (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), 72850b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7286*8bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 72870b57cec5SDimitry Andric def : Pat<(store (i64 (extractelt 7288*8bcb0991SDimitry Andric (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), 72890b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7290*8bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 7291*8bcb0991SDimitry Andric def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), 7292*8bcb0991SDimitry Andric (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; 72930b57cec5SDimitry Andric} 72940b57cec5SDimitry Andric 72950b57cec5SDimitry Andric// Patterns for matching conversions from float to half-float and vice versa. 72960b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 72970b57cec5SDimitry Andric // Use MXCSR.RC for rounding instead of explicitly specifying the default 72980b57cec5SDimitry Andric // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 72990b57cec5SDimitry Andric // configurations we support (the default). However, falling back to MXCSR is 73000b57cec5SDimitry Andric // more consistent with other instructions, which are always controlled by it. 73010b57cec5SDimitry Andric // It's encoded as 0b100. 73020b57cec5SDimitry Andric def : Pat<(fp_to_f16 FR32:$src), 73030b57cec5SDimitry Andric (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (v8i16 (VCVTPS2PHrr 73040b57cec5SDimitry Andric (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4))), sub_16bit))>; 73050b57cec5SDimitry Andric 73060b57cec5SDimitry Andric def : Pat<(f16_to_fp GR16:$src), 73070b57cec5SDimitry Andric (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr 73080b57cec5SDimitry Andric (v4i32 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)))), FR32)) >; 73090b57cec5SDimitry Andric 73100b57cec5SDimitry Andric def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), 73110b57cec5SDimitry Andric (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr 73120b57cec5SDimitry Andric (v8i16 (VCVTPS2PHrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4)))), FR32)) >; 73130b57cec5SDimitry Andric} 73140b57cec5SDimitry Andric 73150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 73160b57cec5SDimitry Andric// AVX2 Instructions 73170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 73180b57cec5SDimitry Andric 73190b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 73200b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 73210b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite sched, 73220b57cec5SDimitry Andric RegisterClass RC, 73230b57cec5SDimitry Andric X86MemOperand x86memop, SDNodeXForm commuteXForm> { 73240b57cec5SDimitry Andric let isCommutable = 1 in 73250b57cec5SDimitry Andric def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 73260b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 73270b57cec5SDimitry Andric !strconcat(OpcodeStr, 73280b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7329*8bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 73300b57cec5SDimitry Andric Sched<[sched]>, VEX_4V; 73310b57cec5SDimitry Andric def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 73320b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 73330b57cec5SDimitry Andric !strconcat(OpcodeStr, 73340b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 73350b57cec5SDimitry Andric [(set RC:$dst, 7336*8bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, 73370b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; 73380b57cec5SDimitry Andric 73390b57cec5SDimitry Andric // Pattern to commute if load is in first source. 7340*8bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), 73410b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 7342*8bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 73430b57cec5SDimitry Andric} 73440b57cec5SDimitry Andric 73450b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 73460b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 73470b57cec5SDimitry Andric SchedWriteBlend.XMM, VR128, i128mem, 73480b57cec5SDimitry Andric BlendCommuteImm4>; 73490b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 73500b57cec5SDimitry Andric SchedWriteBlend.YMM, VR256, i256mem, 73510b57cec5SDimitry Andric BlendCommuteImm8>, VEX_L; 73520b57cec5SDimitry Andric 7353*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 7354*8bcb0991SDimitry Andric (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; 7355*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 7356*8bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 7357*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 7358*8bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 73590b57cec5SDimitry Andric 7360*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 7361*8bcb0991SDimitry Andric (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; 7362*8bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 7363*8bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; 7364*8bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 7365*8bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; 73660b57cec5SDimitry Andric} 73670b57cec5SDimitry Andric 73680b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 73690b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 73700b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but exeuction domain fixing should 73710b57cec5SDimitry Andric// take care of using integer instructions when profitable. 73720b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 73730b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 73740b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 73750b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73760b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 73770b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 73780b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 73790b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73800b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 73810b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 73820b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 73830b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73840b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 73850b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 73860b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 73870b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73880b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 73890b57cec5SDimitry Andric 73900b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 73910b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73920b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 73930b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 73940b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73950b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 73960b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 73970b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 73980b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 73990b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 74000b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 74010b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 74020b57cec5SDimitry Andric} 74030b57cec5SDimitry Andric 74040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 74050b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the 74060b57cec5SDimitry Andric// destination operand 74070b57cec5SDimitry Andric// 74080b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr, 7409*8bcb0991SDimitry Andric X86MemOperand x86memop, PatFrag bcast_frag, 74100b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, Predicate prd> { 74110b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in { 74120b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 74130b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 74140b57cec5SDimitry Andric [(set VR128:$dst, 74150b57cec5SDimitry Andric (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 74160b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM]>, VEX; 74170b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 74180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 74190b57cec5SDimitry Andric [(set VR128:$dst, 7420*8bcb0991SDimitry Andric (OpVT128 (bcast_frag addr:$src)))]>, 74210b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 74220b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 74230b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 74240b57cec5SDimitry Andric [(set VR256:$dst, 74250b57cec5SDimitry Andric (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 74260b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 74270b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 74280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 74290b57cec5SDimitry Andric [(set VR256:$dst, 7430*8bcb0991SDimitry Andric (OpVT256 (bcast_frag addr:$src)))]>, 74310b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 74320b57cec5SDimitry Andric 74330b57cec5SDimitry Andric // Provide aliases for broadcast from the same register class that 74340b57cec5SDimitry Andric // automatically does the extract. 74350b57cec5SDimitry Andric def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 74360b57cec5SDimitry Andric (!cast<Instruction>(NAME#"Yrr") 74370b57cec5SDimitry Andric (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 74380b57cec5SDimitry Andric } 74390b57cec5SDimitry Andric} 74400b57cec5SDimitry Andric 7441*8bcb0991SDimitry Andricdefm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, 74420b57cec5SDimitry Andric v16i8, v32i8, NoVLX_Or_NoBWI>; 7443*8bcb0991SDimitry Andricdefm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, 74440b57cec5SDimitry Andric v8i16, v16i16, NoVLX_Or_NoBWI>; 7445*8bcb0991SDimitry Andricdefm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, 74460b57cec5SDimitry Andric v4i32, v8i32, NoVLX>; 7447*8bcb0991SDimitry Andricdefm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, 74480b57cec5SDimitry Andric v2i64, v4i64, NoVLX>; 74490b57cec5SDimitry Andric 74500b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 74510b57cec5SDimitry Andric // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 74520b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 74530b57cec5SDimitry Andric (VPBROADCASTQrm addr:$src)>; 74540b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 74550b57cec5SDimitry Andric (VPBROADCASTQYrm addr:$src)>; 74560b57cec5SDimitry Andric 7457*8bcb0991SDimitry Andric // FIXME this is to handle aligned extloads from i8/i16. 7458*8bcb0991SDimitry Andric def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 74590b57cec5SDimitry Andric (VPBROADCASTDrm addr:$src)>; 7460*8bcb0991SDimitry Andric def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 74610b57cec5SDimitry Andric (VPBROADCASTDYrm addr:$src)>; 74620b57cec5SDimitry Andric} 74630b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 74640b57cec5SDimitry Andric // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 74650b57cec5SDimitry Andric // This means we'll encounter truncated i32 loads; match that here. 74660b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 74670b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 74680b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 74690b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 74700b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast 74710b57cec5SDimitry Andric (i16 (trunc (i32 (extloadi16 addr:$src)))))), 74720b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 74730b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast 74740b57cec5SDimitry Andric (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 74750b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 74760b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast 74770b57cec5SDimitry Andric (i16 (trunc (i32 (extloadi16 addr:$src)))))), 74780b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 74790b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast 74800b57cec5SDimitry Andric (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 74810b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 74820b57cec5SDimitry Andric 7483*8bcb0991SDimitry Andric // FIXME this is to handle aligned extloads from i8. 7484*8bcb0991SDimitry Andric def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 7485*8bcb0991SDimitry Andric (VPBROADCASTWrm addr:$src)>; 7486*8bcb0991SDimitry Andric def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 7487*8bcb0991SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 74880b57cec5SDimitry Andric} 74890b57cec5SDimitry Andric 74900b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 74910b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 74920b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 74930b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 74940b57cec5SDimitry Andric (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 74950b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 74960b57cec5SDimitry Andric (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 74970b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 74980b57cec5SDimitry Andric (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 74990b57cec5SDimitry Andric} 75000b57cec5SDimitry Andric 75010b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 75020b57cec5SDimitry Andric def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 7503*8bcb0991SDimitry Andric (VPBROADCASTBrr (VMOVDI2PDIrr 75040b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7505*8bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 75060b57cec5SDimitry Andric def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 7507*8bcb0991SDimitry Andric (VPBROADCASTBYrr (VMOVDI2PDIrr 75080b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7509*8bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 75100b57cec5SDimitry Andric 75110b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 7512*8bcb0991SDimitry Andric (VPBROADCASTWrr (VMOVDI2PDIrr 75130b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7514*8bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 75150b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 7516*8bcb0991SDimitry Andric (VPBROADCASTWYrr (VMOVDI2PDIrr 75170b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7518*8bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 75190b57cec5SDimitry Andric} 75200b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 75210b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7522*8bcb0991SDimitry Andric (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; 75230b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7524*8bcb0991SDimitry Andric (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; 75250b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 7526*8bcb0991SDimitry Andric (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; 75270b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7528*8bcb0991SDimitry Andric (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; 75290b57cec5SDimitry Andric} 75300b57cec5SDimitry Andric 75310b57cec5SDimitry Andric// AVX1 broadcast patterns 75320b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7533*8bcb0991SDimitry Andricdef : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), 75340b57cec5SDimitry Andric (VBROADCASTSSYrm addr:$src)>; 7535*8bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), 75360b57cec5SDimitry Andric (VBROADCASTSDYrm addr:$src)>; 7537*8bcb0991SDimitry Andricdef : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), 75380b57cec5SDimitry Andric (VBROADCASTSSrm addr:$src)>; 75390b57cec5SDimitry Andric} 75400b57cec5SDimitry Andric 75410b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 75420b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 75430b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 75440b57cec5SDimitry Andric // 128bit broadcasts: 75450b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast f64:$src)), 75460b57cec5SDimitry Andric (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7547*8bcb0991SDimitry Andric def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), 75480b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 75490b57cec5SDimitry Andric 75500b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 75510b57cec5SDimitry Andric (VMOVDDUPrr VR128:$src)>; 7552*8bcb0991SDimitry Andric def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), 75530b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 75540b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), 75550b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 75560b57cec5SDimitry Andric} 75570b57cec5SDimitry Andric 75580b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 75590b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 75600b57cec5SDimitry Andric (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 75610b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 75620b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 75630b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 75640b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 75650b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 75660b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 75670b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 75680b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 75690b57cec5SDimitry Andric 75700b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7571*8bcb0991SDimitry Andric (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; 75720b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 75730b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7574*8bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), 7575*8bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; 75760b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 75770b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 7578*8bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), 7579*8bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; 75800b57cec5SDimitry Andric 75810b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast i64:$src)), 7582*8bcb0991SDimitry Andric (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; 7583*8bcb0991SDimitry Andric def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), 75840b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 75850b57cec5SDimitry Andric} 75860b57cec5SDimitry Andric 75870b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75880b57cec5SDimitry Andric// VPERM - Permute instructions 75890b57cec5SDimitry Andric// 75900b57cec5SDimitry Andric 75910b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr, 75920b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 75930b57cec5SDimitry Andric X86MemOperand memOp> { 75940b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 75950b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 75960b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 75970b57cec5SDimitry Andric !strconcat(OpcodeStr, 75980b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 75990b57cec5SDimitry Andric [(set VR256:$dst, 76000b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 76010b57cec5SDimitry Andric Sched<[Sched]>, VEX_4V, VEX_L; 76020b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 76030b57cec5SDimitry Andric (ins VR256:$src1, memOp:$src2), 76040b57cec5SDimitry Andric !strconcat(OpcodeStr, 76050b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 76060b57cec5SDimitry Andric [(set VR256:$dst, 76070b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, 76080b57cec5SDimitry Andric (load addr:$src2))))]>, 76090b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; 76100b57cec5SDimitry Andric } 76110b57cec5SDimitry Andric} 76120b57cec5SDimitry Andric 76130b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 76140b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 76150b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 76160b57cec5SDimitry Andric 76170b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 76180b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 76190b57cec5SDimitry Andric X86MemOperand memOp> { 76200b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 76210b57cec5SDimitry Andric def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 76220b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 76230b57cec5SDimitry Andric !strconcat(OpcodeStr, 76240b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 76250b57cec5SDimitry Andric [(set VR256:$dst, 7626*8bcb0991SDimitry Andric (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, 76270b57cec5SDimitry Andric Sched<[Sched]>, VEX, VEX_L; 76280b57cec5SDimitry Andric def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 76290b57cec5SDimitry Andric (ins memOp:$src1, u8imm:$src2), 76300b57cec5SDimitry Andric !strconcat(OpcodeStr, 76310b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 76320b57cec5SDimitry Andric [(set VR256:$dst, 76330b57cec5SDimitry Andric (OpVT (X86VPermi (mem_frag addr:$src1), 7634*8bcb0991SDimitry Andric (i8 timm:$src2))))]>, 76350b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 76360b57cec5SDimitry Andric } 76370b57cec5SDimitry Andric} 76380b57cec5SDimitry Andric 76390b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 76400b57cec5SDimitry Andric WriteShuffle256, i256mem>, VEX_W; 76410b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 76420b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 76430b57cec5SDimitry Andric WriteFShuffle256, f256mem>, VEX_W; 76440b57cec5SDimitry Andric 76450b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 76460b57cec5SDimitry Andric// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 76470b57cec5SDimitry Andric// 76480b57cec5SDimitry Andriclet isCommutable = 1 in 76490b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 76500b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 76510b57cec5SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 76520b57cec5SDimitry Andric [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7653*8bcb0991SDimitry Andric (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>, 76540b57cec5SDimitry Andric VEX_4V, VEX_L; 76550b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 76560b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 76570b57cec5SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 76580b57cec5SDimitry Andric [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 7659*8bcb0991SDimitry Andric (i8 timm:$src3)))]>, 76600b57cec5SDimitry Andric Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 76610b57cec5SDimitry Andric 76620b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 76630b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), 7664*8bcb0991SDimitry Andric VR256:$src1, (i8 timm:$imm))), 7665*8bcb0991SDimitry Andric (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; 76660b57cec5SDimitry Andric 76670b57cec5SDimitry Andric 76680b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 76690b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values 76700b57cec5SDimitry Andric// 76710b57cec5SDimitry Andriclet hasSideEffects = 0 in { 76720b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 76730b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 76740b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 76750b57cec5SDimitry Andric []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 76760b57cec5SDimitry Andriclet mayLoad = 1 in 76770b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 76780b57cec5SDimitry Andric (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 76790b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 76800b57cec5SDimitry Andric []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 76810b57cec5SDimitry Andric} 76820b57cec5SDimitry Andric 76830b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 76840b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>; 76850b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>; 76860b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>; 76870b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>; 76880b57cec5SDimitry Andric} 76890b57cec5SDimitry Andric 76900b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 76910b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values 76920b57cec5SDimitry Andric// 76930b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 76940b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 76950b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 76960b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 76970b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in 76980b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 76990b57cec5SDimitry Andric (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 77000b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 77010b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 77020b57cec5SDimitry Andric 77030b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 77040b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 77050b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 77060b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 77070b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 77080b57cec5SDimitry Andric} 77090b57cec5SDimitry Andric 77100b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 77110b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 77120b57cec5SDimitry Andric// 77130b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr, 77140b57cec5SDimitry Andric Intrinsic IntLd128, Intrinsic IntLd256, 77150b57cec5SDimitry Andric Intrinsic IntSt128, Intrinsic IntSt256> { 77160b57cec5SDimitry Andric def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 77170b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 77180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77190b57cec5SDimitry Andric [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 77200b57cec5SDimitry Andric VEX_4V, Sched<[WriteVecMaskedLoad]>; 77210b57cec5SDimitry Andric def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 77220b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 77230b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77240b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 77250b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>; 77260b57cec5SDimitry Andric def mr : AVX28I<0x8e, MRMDestMem, (outs), 77270b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src1, VR128:$src2), 77280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77290b57cec5SDimitry Andric [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 77300b57cec5SDimitry Andric VEX_4V, Sched<[WriteVecMaskedStore]>; 77310b57cec5SDimitry Andric def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 77320b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src1, VR256:$src2), 77330b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77340b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 77350b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>; 77360b57cec5SDimitry Andric} 77370b57cec5SDimitry Andric 77380b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 77390b57cec5SDimitry Andric int_x86_avx2_maskload_d, 77400b57cec5SDimitry Andric int_x86_avx2_maskload_d_256, 77410b57cec5SDimitry Andric int_x86_avx2_maskstore_d, 77420b57cec5SDimitry Andric int_x86_avx2_maskstore_d_256>; 77430b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 77440b57cec5SDimitry Andric int_x86_avx2_maskload_q, 77450b57cec5SDimitry Andric int_x86_avx2_maskload_q_256, 77460b57cec5SDimitry Andric int_x86_avx2_maskstore_q, 77470b57cec5SDimitry Andric int_x86_avx2_maskstore_q_256>, VEX_W; 77480b57cec5SDimitry Andric 77490b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 7750*8bcb0991SDimitry Andric ValueType MaskVT> { 77510b57cec5SDimitry Andric // masked store 77520b57cec5SDimitry Andric def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 77530b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 77540b57cec5SDimitry Andric // masked load 77550b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 77560b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 77570b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 77580b57cec5SDimitry Andric (VT immAllZerosV))), 77590b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 77600b57cec5SDimitry Andric} 77610b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 7762*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; 7763*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; 7764*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; 7765*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; 77660b57cec5SDimitry Andric} 77670b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 77680b57cec5SDimitry Andric // load/store i32/i64 not supported use ps/pd version 7769*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; 7770*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; 7771*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; 7772*8bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; 77730b57cec5SDimitry Andric} 77740b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 7775*8bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; 7776*8bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; 7777*8bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; 7778*8bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; 77790b57cec5SDimitry Andric} 77800b57cec5SDimitry Andric 77810b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 77820b57cec5SDimitry Andric// SubVector Broadcasts 77830b57cec5SDimitry Andric// Provide fallback in case the load node that is used in the patterns above 77840b57cec5SDimitry Andric// is used by additional users, which prevents the pattern selection. 77850b57cec5SDimitry Andric 77860b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 77870b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))), 77880b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 77890b57cec5SDimitry Andric (v2f64 VR128:$src), 1)>; 77900b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))), 77910b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 77920b57cec5SDimitry Andric (v4f32 VR128:$src), 1)>; 77930b57cec5SDimitry Andric} 77940b57cec5SDimitry Andric 77950b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 77960b57cec5SDimitry Andric// convert to integer when profitable. 77970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 77980b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))), 77990b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 78000b57cec5SDimitry Andric (v2i64 VR128:$src), 1)>; 78010b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))), 78020b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 78030b57cec5SDimitry Andric (v4i32 VR128:$src), 1)>; 78040b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))), 78050b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 78060b57cec5SDimitry Andric (v8i16 VR128:$src), 1)>; 78070b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))), 78080b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 78090b57cec5SDimitry Andric (v16i8 VR128:$src), 1)>; 78100b57cec5SDimitry Andric} 78110b57cec5SDimitry Andric 78120b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78130b57cec5SDimitry Andric// Variable Bit Shifts 78140b57cec5SDimitry Andric// 78150b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 78160b57cec5SDimitry Andric ValueType vt128, ValueType vt256> { 78170b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 78180b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 78190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78200b57cec5SDimitry Andric [(set VR128:$dst, 78210b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 78220b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; 78230b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 78240b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 78250b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78260b57cec5SDimitry Andric [(set VR128:$dst, 78270b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, 78280b57cec5SDimitry Andric (vt128 (load addr:$src2)))))]>, 78290b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, 78300b57cec5SDimitry Andric SchedWriteVarVecShift.XMM.ReadAfterFold]>; 78310b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 78320b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 78330b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78340b57cec5SDimitry Andric [(set VR256:$dst, 78350b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 78360b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 78370b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 78380b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 78390b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78400b57cec5SDimitry Andric [(set VR256:$dst, 78410b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, 78420b57cec5SDimitry Andric (vt256 (load addr:$src2)))))]>, 78430b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 78440b57cec5SDimitry Andric SchedWriteVarVecShift.YMM.ReadAfterFold]>; 78450b57cec5SDimitry Andric} 78460b57cec5SDimitry Andric 78470b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 78480b57cec5SDimitry Andric defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 78490b57cec5SDimitry Andric defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; 78500b57cec5SDimitry Andric defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 78510b57cec5SDimitry Andric defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; 78520b57cec5SDimitry Andric defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 78530b57cec5SDimitry Andric} 78540b57cec5SDimitry Andric 78550b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78560b57cec5SDimitry Andric// VGATHER - GATHER Operations 78570b57cec5SDimitry Andric 78580b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions. 78590b57cec5SDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx, 78600b57cec5SDimitry Andric ValueType VTy, PatFrag GatherNode128, 78610b57cec5SDimitry Andric PatFrag GatherNode256, RegisterClass RC256, 78620b57cec5SDimitry Andric X86MemOperand memop128, X86MemOperand memop256, 78630b57cec5SDimitry Andric ValueType MTx = VTx, ValueType MTy = VTy> { 78640b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 78650b57cec5SDimitry Andric (ins VR128:$src1, memop128:$src2, VR128:$mask), 78660b57cec5SDimitry Andric !strconcat(OpcodeStr, 78670b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 78680b57cec5SDimitry Andric [(set (VTx VR128:$dst), (MTx VR128:$mask_wb), 78690b57cec5SDimitry Andric (GatherNode128 VR128:$src1, VR128:$mask, 78700b57cec5SDimitry Andric vectoraddr:$src2))]>, 78710b57cec5SDimitry Andric VEX, Sched<[WriteLoad]>; 78720b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 78730b57cec5SDimitry Andric (ins RC256:$src1, memop256:$src2, RC256:$mask), 78740b57cec5SDimitry Andric !strconcat(OpcodeStr, 78750b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 78760b57cec5SDimitry Andric [(set (VTy RC256:$dst), (MTy RC256:$mask_wb), 78770b57cec5SDimitry Andric (GatherNode256 RC256:$src1, RC256:$mask, 78780b57cec5SDimitry Andric vectoraddr:$src2))]>, 78790b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteLoad]>; 78800b57cec5SDimitry Andric} 78810b57cec5SDimitry Andric 78820b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 78830b57cec5SDimitry Andric let mayLoad = 1, hasSideEffects = 0, Constraints 78840b57cec5SDimitry Andric = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 78850b57cec5SDimitry Andric in { 78860b57cec5SDimitry Andric defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32, 78870b57cec5SDimitry Andric mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W; 78880b57cec5SDimitry Andric defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64, 78890b57cec5SDimitry Andric mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W; 78900b57cec5SDimitry Andric defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32, 78910b57cec5SDimitry Andric mgatherv8i32, VR256, vx128mem, vy256mem>; 78920b57cec5SDimitry Andric defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64, 78930b57cec5SDimitry Andric mgatherv4i64, VR128, vx64mem, vy128mem>; 78940b57cec5SDimitry Andric 78950b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 78960b57cec5SDimitry Andric defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32, 78970b57cec5SDimitry Andric mgatherv4i32, VR256, vx128mem, vx256mem, 78980b57cec5SDimitry Andric v2i64, v4i64>, VEX_W; 78990b57cec5SDimitry Andric defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64, 79000b57cec5SDimitry Andric mgatherv4i64, VR256, vx128mem, vy256mem, 79010b57cec5SDimitry Andric v2i64, v4i64>, VEX_W; 79020b57cec5SDimitry Andric } 79030b57cec5SDimitry Andric 79040b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 79050b57cec5SDimitry Andric defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32, 79060b57cec5SDimitry Andric mgatherv8i32, VR256, vx128mem, vy256mem, 79070b57cec5SDimitry Andric v4i32, v8i32>; 79080b57cec5SDimitry Andric defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64, 79090b57cec5SDimitry Andric mgatherv4i64, VR128, vx64mem, vy128mem, 79100b57cec5SDimitry Andric v4i32, v4i32>; 79110b57cec5SDimitry Andric } 79120b57cec5SDimitry Andric } 79130b57cec5SDimitry Andric} 79140b57cec5SDimitry Andric 79150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 79160b57cec5SDimitry Andric// GFNI instructions 79170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 79180b57cec5SDimitry Andric 79190b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 79200b57cec5SDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 79210b57cec5SDimitry Andric X86MemOperand X86MemOp, bit Is2Addr = 0> { 79220b57cec5SDimitry Andric let ExeDomain = SSEPackedInt, 79230b57cec5SDimitry Andric AsmString = !if(Is2Addr, 79240b57cec5SDimitry Andric OpcodeStr##"\t{$src2, $dst|$dst, $src2}", 79250b57cec5SDimitry Andric OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 79260b57cec5SDimitry Andric let isCommutable = 1 in 79270b57cec5SDimitry Andric def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 79280b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 79290b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM]>, T8PD; 79300b57cec5SDimitry Andric 79310b57cec5SDimitry Andric def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 79320b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 79330b57cec5SDimitry Andric (MemOpFrag addr:$src2))))]>, 79340b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD; 79350b57cec5SDimitry Andric } 79360b57cec5SDimitry Andric} 79370b57cec5SDimitry Andric 79380b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 79390b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 79400b57cec5SDimitry Andric X86MemOperand X86MemOp, bit Is2Addr = 0> { 79410b57cec5SDimitry Andric let AsmString = !if(Is2Addr, 79420b57cec5SDimitry Andric OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 79430b57cec5SDimitry Andric OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 79440b57cec5SDimitry Andric def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 79450b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), "", 7946*8bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], 79470b57cec5SDimitry Andric SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; 79480b57cec5SDimitry Andric def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 79490b57cec5SDimitry Andric (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 79500b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, 79510b57cec5SDimitry Andric (MemOpFrag addr:$src2), 7952*8bcb0991SDimitry Andric timm:$src3)))], SSEPackedInt>, 79530b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; 79540b57cec5SDimitry Andric } 79550b57cec5SDimitry Andric} 79560b57cec5SDimitry Andric 79570b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 79580b57cec5SDimitry Andric let Constraints = "$src1 = $dst", 79590b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 79600b57cec5SDimitry Andric defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 79610b57cec5SDimitry Andric VR128, load, i128mem, 1>; 79620b57cec5SDimitry Andric let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 79630b57cec5SDimitry Andric defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128, 79640b57cec5SDimitry Andric load, i128mem>, VEX_4V, VEX_W; 79650b57cec5SDimitry Andric defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256, 79660b57cec5SDimitry Andric load, i256mem>, VEX_4V, VEX_L, VEX_W; 79670b57cec5SDimitry Andric } 79680b57cec5SDimitry Andric} 79690b57cec5SDimitry Andric 79700b57cec5SDimitry Andric// GF2P8MULB 79710b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", 79720b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 79730b57cec5SDimitry Andricdefm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 79740b57cec5SDimitry Andric i128mem, 1>; 79750b57cec5SDimitry Andriclet Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 79760b57cec5SDimitry Andric defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 79770b57cec5SDimitry Andric i128mem>, VEX_4V; 79780b57cec5SDimitry Andric defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 79790b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L; 79800b57cec5SDimitry Andric} 79810b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB 79820b57cec5SDimitry Andriclet isCommutable = 0 in { 79830b57cec5SDimitry Andric defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 79840b57cec5SDimitry Andric X86GF2P8affineinvqb>, TAPD; 79850b57cec5SDimitry Andric defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 79860b57cec5SDimitry Andric X86GF2P8affineqb>, TAPD; 79870b57cec5SDimitry Andric} 79880b57cec5SDimitry Andric 7989