10b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions, 100b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation, 110b57cec5SDimitry Andric// machine code emission, and analysis. 120b57cec5SDimitry Andric// 130b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes 170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 20fe6060f1SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 210b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop, 220b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 230b57cec5SDimitry Andric bit Is2Addr = 1> { 240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 250b57cec5SDimitry Andric let isCommutable = 1 in { 260b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 270b57cec5SDimitry Andric !if(Is2Addr, 280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 300b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 310b57cec5SDimitry Andric Sched<[sched]>; 320b57cec5SDimitry Andric } 330b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 340b57cec5SDimitry Andric !if(Is2Addr, 350b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 360b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 370b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 380b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 390b57cec5SDimitry Andric} 400b57cec5SDimitry Andric} 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 43349cc55cSDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, 440b57cec5SDimitry Andric SDPatternOperator OpNode, RegisterClass RC, 450b57cec5SDimitry Andric ValueType VT, string asm, Operand memopr, 465ffd83dbSDimitry Andric PatFrags mem_frags, Domain d, 470b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 480b57cec5SDimitry Andriclet hasSideEffects = 0 in { 490b57cec5SDimitry Andric def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 500b57cec5SDimitry Andric !if(Is2Addr, 510b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 520b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 530b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 540b57cec5SDimitry Andric Sched<[sched]>; 550b57cec5SDimitry Andric let mayLoad = 1 in 560b57cec5SDimitry Andric def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 570b57cec5SDimitry Andric !if(Is2Addr, 580b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 590b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 605ffd83dbSDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>, 610b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 620b57cec5SDimitry Andric} 630b57cec5SDimitry Andric} 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class 66fe6060f1SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 670b57cec5SDimitry Andric RegisterClass RC, ValueType vt, 680b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, 690b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 700b57cec5SDimitry Andric bit Is2Addr = 1> { 710b57cec5SDimitry Andric let isCommutable = 1 in 720b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 730b57cec5SDimitry Andric !if(Is2Addr, 740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 760b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 770b57cec5SDimitry Andric Sched<[sched]>; 780b57cec5SDimitry Andric let mayLoad = 1 in 790b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 800b57cec5SDimitry Andric !if(Is2Addr, 810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 830b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 840b57cec5SDimitry Andric d>, 850b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 860b57cec5SDimitry Andric} 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 890b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 900b57cec5SDimitry Andric string OpcodeStr, X86MemOperand x86memop, 910b57cec5SDimitry Andric X86FoldableSchedWrite sched, 920b57cec5SDimitry Andric list<dag> pat_rr, list<dag> pat_rm, 930b57cec5SDimitry Andric bit Is2Addr = 1> { 940b57cec5SDimitry Andric let isCommutable = 1, hasSideEffects = 0 in 950b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 960b57cec5SDimitry Andric !if(Is2Addr, 970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 990b57cec5SDimitry Andric pat_rr, d>, 1000b57cec5SDimitry Andric Sched<[sched]>; 1010b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 1020b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 1030b57cec5SDimitry Andric !if(Is2Addr, 1040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 1050b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1060b57cec5SDimitry Andric pat_rm, d>, 1070b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1080b57cec5SDimitry Andric} 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 1120b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos. 1130b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1140b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 11581ad6265SDimitry Andric def FsFLD0SH : I<0, Pseudo, (outs FR16:$dst), (ins), "", 11681ad6265SDimitry Andric [(set FR16:$dst, fp16imm0)]>, Requires<[HasSSE2, NoAVX512]>; 1170b57cec5SDimitry Andric def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 1180b57cec5SDimitry Andric [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1190b57cec5SDimitry Andric def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 1208bcb0991SDimitry Andric [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; 1218bcb0991SDimitry Andric def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1228bcb0991SDimitry Andric [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1230b57cec5SDimitry Andric} 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1260b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors 1270b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse. 1300b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 1310b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor. 1320b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1330b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial. 1340b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1358bcb0991SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1360b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1370b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 immAllZerosV))]>; 1380b57cec5SDimitry Andric} 1390b57cec5SDimitry Andric 1408bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 1418bcb0991SDimitry Andricdef : Pat<(v16i8 immAllZerosV), (V_SET0)>; 1428bcb0991SDimitry Andricdef : Pat<(v8i16 immAllZerosV), (V_SET0)>; 143fcaf7f86SDimitry Andricdef : Pat<(v8f16 immAllZerosV), (V_SET0)>; 1440b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>; 1458bcb0991SDimitry Andricdef : Pat<(v2i64 immAllZerosV), (V_SET0)>; 1468bcb0991SDimitry Andricdef : Pat<(v2f64 immAllZerosV), (V_SET0)>; 1478bcb0991SDimitry Andric} 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 1510b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero 1520b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY 1530b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty 1540b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1550b57cec5SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1560b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1570b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllZerosV))]>; 1580b57cec5SDimitry Andric} 1590b57cec5SDimitry Andric 1608bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 1618bcb0991SDimitry Andricdef : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 1628bcb0991SDimitry Andricdef : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 163fcaf7f86SDimitry Andricdef : Pat<(v16f16 immAllZerosV), (AVX_SET0)>; 1648bcb0991SDimitry Andricdef : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 1658bcb0991SDimitry Andricdef : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; 1668bcb0991SDimitry Andricdef : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 1678bcb0991SDimitry Andric} 1688bcb0991SDimitry Andric 1690b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1700b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial. 1710b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1720b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 1730b57cec5SDimitry Andric def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1740b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 immAllOnesV))]>; 1750b57cec5SDimitry Andric let Predicates = [HasAVX1Only, OptForMinSize] in { 1760b57cec5SDimitry Andric def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 1770b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric let Predicates = [HasAVX2] in 1800b57cec5SDimitry Andric def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1810b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1820b57cec5SDimitry Andric} 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1850b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions 1860b57cec5SDimitry Andric// 1870b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 1880b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register 1890b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 1900b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we 1910b57cec5SDimitry Andric// don't use movss/movsd for copies. 1920b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1930b57cec5SDimitry Andric 194349cc55cSDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc, 1950b57cec5SDimitry Andric string asm_opr, Domain d, string Name> { 1960b57cec5SDimitry Andric let isCommutable = 1 in 1970b57cec5SDimitry Andric def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 1980b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 1990b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), 2000b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 2010b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric // For the disassembler 2040b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 2050b57cec5SDimitry Andric def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 2060b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 2070b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), []>, 2080b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; 2090b57cec5SDimitry Andric} 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 2120b57cec5SDimitry Andric X86MemOperand x86memop, string OpcodeStr, 2130b57cec5SDimitry Andric Domain d, string Name, Predicate pred> { 2140b57cec5SDimitry Andric // AVX 2150b57cec5SDimitry Andric let Predicates = [UseAVX, OptForSize] in 216349cc55cSDimitry Andric defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 2170b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, 2180b57cec5SDimitry Andric "V"#Name>, 2190b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2220b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2230b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 2240b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; 2250b57cec5SDimitry Andric // SSE1 & 2 2260b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2270b57cec5SDimitry Andric let Predicates = [pred, NoSSE41_Or_OptForSize] in 228349cc55cSDimitry Andric defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 2290b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}", d, Name>; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2330b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2340b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 2350b57cec5SDimitry Andric Sched<[WriteFStore]>; 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2380b57cec5SDimitry Andric (!cast<Instruction>("V"#NAME#"rr_REV") 2390b57cec5SDimitry Andric VR128:$dst, VR128:$src1, VR128:$src2), 0>; 2400b57cec5SDimitry Andric def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 2410b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rr_REV") 2420b57cec5SDimitry Andric VR128:$dst, VR128:$src2), 0>; 2430b57cec5SDimitry Andric} 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits. 2460b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 2470b57cec5SDimitry Andric PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 2480b57cec5SDimitry Andric Domain d> { 2490b57cec5SDimitry Andric def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2500b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2510b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 2520b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 2530b57cec5SDimitry Andric def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2540b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2550b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 2560b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric // _alt version uses FR32/FR64 register class. 2590b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 2600b57cec5SDimitry Andric def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2610b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2620b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 2630b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 2640b57cec5SDimitry Andric def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2650b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2660b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 2670b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric} 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 2720b57cec5SDimitry Andric SSEPackedSingle, "MOVSS", UseSSE1>, XS; 2730b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 2740b57cec5SDimitry Andric SSEPackedDouble, "MOVSD", UseSSE2>, XD; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in { 2770b57cec5SDimitry Andric defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 2780b57cec5SDimitry Andric SSEPackedSingle>, XS; 2790b57cec5SDimitry Andric defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 2800b57cec5SDimitry Andric SSEPackedDouble>, XD; 2810b57cec5SDimitry Andric} 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric// Patterns 2840b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 2850b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 2860b57cec5SDimitry Andric (VMOVSSrm addr:$src)>; 2870b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 2880b57cec5SDimitry Andric (VMOVSDrm addr:$src)>; 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric // Represent the same patterns above but in the form they appear for 2910b57cec5SDimitry Andric // 256-bit types 2920b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzload32 addr:$src)), 2930b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 2940b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzload64 addr:$src)), 2950b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 2960b57cec5SDimitry Andric} 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 2990b57cec5SDimitry Andric // Move scalar to XMM zero-extended, zeroing a VR128 then do a 3000b57cec5SDimitry Andric // MOVSS to the lower bits. 3010b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 3020b57cec5SDimitry Andric (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3030b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3040b57cec5SDimitry Andric (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric // Move low f32 and clear high bits. 3070b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 3080b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3090b57cec5SDimitry Andric (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 3100b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 3110b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 3120b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3130b57cec5SDimitry Andric (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 3140b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 3150b57cec5SDimitry Andric} 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 3180b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a 3190b57cec5SDimitry Andric// MOVSS to the lower bits. 3200b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 3210b57cec5SDimitry Andric (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3220b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3230b57cec5SDimitry Andric (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3240b57cec5SDimitry Andric} 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andriclet Predicates = [UseSSE2] in 3270b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 3280b57cec5SDimitry Andric (MOVSDrm addr:$src)>; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andriclet Predicates = [UseSSE1] in 3310b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 3320b57cec5SDimitry Andric (MOVSSrm addr:$src)>; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3350b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 3360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC, 3390b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag ld_frag, 3400b57cec5SDimitry Andric string asm, Domain d, 3410b57cec5SDimitry Andric X86SchedWriteMoveLS sched> { 3420b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in 3430b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 3440b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 3450b57cec5SDimitry Andric Sched<[sched.RR]>; 3460b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in 3470b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 3480b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3490b57cec5SDimitry Andric [(set RC:$dst, (ld_frag addr:$src))], d>, 3500b57cec5SDimitry Andric Sched<[sched.RM]>; 3510b57cec5SDimitry Andric} 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3540b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3550b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3560b57cec5SDimitry Andric PS, VEX, VEX_WIG; 3570b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3580b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3590b57cec5SDimitry Andric PD, VEX, VEX_WIG; 3600b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3610b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3620b57cec5SDimitry Andric PS, VEX, VEX_WIG; 3630b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3640b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3650b57cec5SDimitry Andric PD, VEX, VEX_WIG; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 3680b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 3690b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 3700b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 3710b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 3720b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 3730b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 3740b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 3750b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 3760b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 3770b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 3780b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 3790b57cec5SDimitry Andric} 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 3820b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3830b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3840b57cec5SDimitry Andric PS; 3850b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3860b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 3870b57cec5SDimitry Andric PS; 3880b57cec5SDimitry Andric} 3890b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 3900b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3910b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3920b57cec5SDimitry Andric PD; 3930b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3940b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 3950b57cec5SDimitry Andric PD; 3960b57cec5SDimitry Andric} 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3990b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 4000b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4010b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4020b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 4030b57cec5SDimitry Andric VEX, VEX_WIG; 4040b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4050b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4060b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 4070b57cec5SDimitry Andric VEX, VEX_WIG; 4080b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4090b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4100b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>, 4110b57cec5SDimitry Andric VEX, VEX_WIG; 4120b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4130b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4140b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>, 4150b57cec5SDimitry Andric VEX, VEX_WIG; 4160b57cec5SDimitry Andric} // SchedRW 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 4190b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4200b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4210b57cec5SDimitry Andric [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 4220b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4230b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4240b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4250b57cec5SDimitry Andric [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 4260b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4270b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4280b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4290b57cec5SDimitry Andric [(store (v8f32 VR256:$src), addr:$dst)]>, 4300b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4310b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4320b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4330b57cec5SDimitry Andric [(store (v4f64 VR256:$src), addr:$dst)]>, 4340b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4350b57cec5SDimitry Andric} // SchedRW 4360b57cec5SDimitry Andric} // Predicate 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric// For disassembler 4390b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4400b57cec5SDimitry Andric isMoveReg = 1 in { 4410b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 4420b57cec5SDimitry Andric def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 4430b57cec5SDimitry Andric (ins VR128:$src), 4440b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 4450b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; 4460b57cec5SDimitry Andric def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 4470b57cec5SDimitry Andric (ins VR128:$src), 4480b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 4490b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; 4500b57cec5SDimitry Andric def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 4510b57cec5SDimitry Andric (ins VR128:$src), 4520b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 4530b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; 4540b57cec5SDimitry Andric def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 4550b57cec5SDimitry Andric (ins VR128:$src), 4560b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 4570b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; 4580b57cec5SDimitry Andric} // SchedRW 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 4610b57cec5SDimitry Andric def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 4620b57cec5SDimitry Andric (ins VR256:$src), 4630b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 4640b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; 4650b57cec5SDimitry Andric def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 4660b57cec5SDimitry Andric (ins VR256:$src), 4670b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 4680b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; 4690b57cec5SDimitry Andric def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 4700b57cec5SDimitry Andric (ins VR256:$src), 4710b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 4720b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; 4730b57cec5SDimitry Andric def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 4740b57cec5SDimitry Andric (ins VR256:$src), 4750b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 4760b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; 4770b57cec5SDimitry Andric} // SchedRW 4780b57cec5SDimitry Andric} // Predicate 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 4810b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4820b57cec5SDimitry Andric (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 4830b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4840b57cec5SDimitry Andric (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 4850b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4860b57cec5SDimitry Andric (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 4870b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4880b57cec5SDimitry Andric (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 4890b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4900b57cec5SDimitry Andric (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 4910b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4920b57cec5SDimitry Andric (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 4930b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4940b57cec5SDimitry Andric (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 4950b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4960b57cec5SDimitry Andric (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 4990b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5000b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 5010b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 5020b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5030b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 5040b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 5050b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5060b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 5070b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>; 5080b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5090b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 5100b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>; 5110b57cec5SDimitry Andric} // SchedRW 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric// For disassembler 5140b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 5150b57cec5SDimitry Andric isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 5160b57cec5SDimitry Andric def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5170b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 5180b57cec5SDimitry Andric FoldGenData<"MOVAPSrr">; 5190b57cec5SDimitry Andric def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5200b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 5210b57cec5SDimitry Andric FoldGenData<"MOVAPDrr">; 5220b57cec5SDimitry Andric def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5230b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 5240b57cec5SDimitry Andric FoldGenData<"MOVUPSrr">; 5250b57cec5SDimitry Andric def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5260b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 5270b57cec5SDimitry Andric FoldGenData<"MOVUPDrr">; 5280b57cec5SDimitry Andric} 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 5310b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 5320b57cec5SDimitry Andric (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 5330b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 5340b57cec5SDimitry Andric (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 5350b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 5360b57cec5SDimitry Andric (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 5370b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 5380b57cec5SDimitry Andric (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5410b57cec5SDimitry Andric // 256-bit load/store need to use floating point load/store in case we don't 5420b57cec5SDimitry Andric // have AVX2. Execution domain fixing will convert to integer if AVX2 is 5430b57cec5SDimitry Andric // available and changing the domain is beneficial. 5440b57cec5SDimitry Andric def : Pat<(alignedloadv4i64 addr:$src), 5450b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5460b57cec5SDimitry Andric def : Pat<(alignedloadv8i32 addr:$src), 5470b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5480b57cec5SDimitry Andric def : Pat<(alignedloadv16i16 addr:$src), 5490b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5500b57cec5SDimitry Andric def : Pat<(alignedloadv32i8 addr:$src), 5510b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5520b57cec5SDimitry Andric def : Pat<(loadv4i64 addr:$src), 5530b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5540b57cec5SDimitry Andric def : Pat<(loadv8i32 addr:$src), 5550b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5560b57cec5SDimitry Andric def : Pat<(loadv16i16 addr:$src), 5570b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5580b57cec5SDimitry Andric def : Pat<(loadv32i8 addr:$src), 5590b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 5620b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5630b57cec5SDimitry Andric def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 5640b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5650b57cec5SDimitry Andric def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 5660b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5670b57cec5SDimitry Andric def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 5680b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5690b57cec5SDimitry Andric def : Pat<(store (v4i64 VR256:$src), addr:$dst), 5700b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5710b57cec5SDimitry Andric def : Pat<(store (v8i32 VR256:$src), addr:$dst), 5720b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5730b57cec5SDimitry Andric def : Pat<(store (v16i16 VR256:$src), addr:$dst), 5740b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5750b57cec5SDimitry Andric def : Pat<(store (v32i8 VR256:$src), addr:$dst), 5760b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 577fcaf7f86SDimitry Andric 578fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 579fcaf7f86SDimitry Andric (VMOVAPSrm addr:$src)>; 580fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 581fcaf7f86SDimitry Andric (VMOVUPSrm addr:$src)>; 582fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 583fcaf7f86SDimitry Andric (VMOVAPSmr addr:$dst, VR128:$src)>; 584fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 585fcaf7f86SDimitry Andric (VMOVUPSmr addr:$dst, VR128:$src)>; 586fcaf7f86SDimitry Andric def : Pat<(alignedloadv16f16 addr:$src), 587fcaf7f86SDimitry Andric (VMOVAPSYrm addr:$src)>; 588fcaf7f86SDimitry Andric def : Pat<(loadv16f16 addr:$src), 589fcaf7f86SDimitry Andric (VMOVUPSYrm addr:$src)>; 590fcaf7f86SDimitry Andric def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst), 591fcaf7f86SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 592fcaf7f86SDimitry Andric def : Pat<(store (v16f16 VR256:$src), addr:$dst), 593fcaf7f86SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5940b57cec5SDimitry Andric} 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter). 5970b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU 5980b57cec5SDimitry Andric// during the SSE domain pass. 5990b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 6000b57cec5SDimitry Andric def : Pat<(alignedloadv2i64 addr:$src), 6010b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6020b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 6030b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6040b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 6050b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6060b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 6070b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6080b57cec5SDimitry Andric def : Pat<(loadv2i64 addr:$src), 6090b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6100b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 6110b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6120b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 6130b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6140b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 6150b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 6180b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6190b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 6200b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6210b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 6220b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6230b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 6240b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6250b57cec5SDimitry Andric def : Pat<(store (v2i64 VR128:$src), addr:$dst), 6260b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6270b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 6280b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6290b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 6300b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6310b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 6320b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6330b57cec5SDimitry Andric} 6340b57cec5SDimitry Andric 635fcaf7f86SDimitry Andriclet Predicates = [UseSSE2] in { 636fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 637fcaf7f86SDimitry Andric (MOVAPSrm addr:$src)>; 638fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 639fcaf7f86SDimitry Andric (MOVUPSrm addr:$src)>; 640fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 641fcaf7f86SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 642fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 643fcaf7f86SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 644fcaf7f86SDimitry Andric} 645fcaf7f86SDimitry Andric 6460b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6470b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions 6480b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6490b57cec5SDimitry Andric 650fe6060f1SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, 6510b57cec5SDimitry Andric string base_opc, string asm_opr> { 6520b57cec5SDimitry Andric // No pattern as they need be special cased between high and low. 6530b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 6540b57cec5SDimitry Andric def PSrm : PI<opc, MRMSrcMem, 6550b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6560b57cec5SDimitry Andric !strconcat(base_opc, "s", asm_opr), 6570b57cec5SDimitry Andric [], SSEPackedSingle>, PS, 6580b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric def PDrm : PI<opc, MRMSrcMem, 6610b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6620b57cec5SDimitry Andric !strconcat(base_opc, "d", asm_opr), 6630b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 6640b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src2)))))], 6650b57cec5SDimitry Andric SSEPackedDouble>, PD, 6660b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6670b57cec5SDimitry Andric} 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 6700b57cec5SDimitry Andric string base_opc> { 6710b57cec5SDimitry Andric let Predicates = [UseAVX] in 6720b57cec5SDimitry Andric defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6730b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 6740b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 6770b57cec5SDimitry Andric defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6780b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}">; 6790b57cec5SDimitry Andric} 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 6840b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 6850b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 6860b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6870b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 6880b57cec5SDimitry Andric []>, 6890b57cec5SDimitry Andric VEX, VEX_WIG; 6900b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6910b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 6920b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 6930b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 6940b57cec5SDimitry Andric VEX, VEX_WIG; 6950b57cec5SDimitry Andric}// UseAVX 6960b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 6970b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6980b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 6990b57cec5SDimitry Andric []>; 7000b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7010b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 7020b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 7030b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 7040b57cec5SDimitry Andric} // SchedRW 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 7070b57cec5SDimitry Andric // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 7080b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 7090b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 7108bcb0991SDimitry Andric def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, 7110b57cec5SDimitry Andric (i8 -28)), 7120b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 7130b57cec5SDimitry Andric def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 7140b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzload64 addr:$src)), 7170b57cec5SDimitry Andric (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 7180b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 7190b57cec5SDimitry Andric (MOVLPSmr addr:$dst, VR128:$src)>; 7200b57cec5SDimitry Andric} 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7230b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions 7240b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 7290b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low 7300b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible. 7310b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7320b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7330b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7340b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 7350b57cec5SDimitry Andric []>, VEX, VEX_WIG; 7360b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7370b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7380b57cec5SDimitry Andric [(store (f64 (extractelt 7390b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 7400b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; 7410b57cec5SDimitry Andric} // UseAVX 7420b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7430b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7440b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 7450b57cec5SDimitry Andric []>; 7460b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7470b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7480b57cec5SDimitry Andric [(store (f64 (extractelt 7490b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 7500b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 7510b57cec5SDimitry Andric} // SchedRW 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7545ffd83dbSDimitry Andric // MOVHPD patterns 7550b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7560b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7590b57cec5SDimitry Andric (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 7600b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7610b57cec5SDimitry Andric (VMOVHPDmr addr:$dst, VR128:$src)>; 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // MOVLPD patterns 7640b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 7650b57cec5SDimitry Andric (VMOVLPDrm VR128:$src1, addr:$src2)>; 7660b57cec5SDimitry Andric} 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 7690b57cec5SDimitry Andric // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 7700b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 7710b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 7728bcb0991SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), 7730b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7740b57cec5SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 7750b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7760b57cec5SDimitry Andric 7770b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 7780b57cec5SDimitry Andric addr:$dst), 7790b57cec5SDimitry Andric (MOVHPSmr addr:$dst, VR128:$src)>; 7800b57cec5SDimitry Andric} 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 7830b57cec5SDimitry Andric // MOVHPD patterns 7840b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7850b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7880b57cec5SDimitry Andric (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 7890b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7900b57cec5SDimitry Andric (MOVHPDmr addr:$dst, VR128:$src)>; 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric // MOVLPD patterns 7930b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 7940b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 7950b57cec5SDimitry Andric} 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 7980b57cec5SDimitry Andric // Use MOVLPD to load into the low bits from a full vector unless we can use 7990b57cec5SDimitry Andric // BLENDPD. 8008bcb0991SDimitry Andric def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), 8010b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 8020b57cec5SDimitry Andric} 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8050b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 8060b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 8090b57cec5SDimitry Andric def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 8100b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8110b57cec5SDimitry Andric "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8120b57cec5SDimitry Andric [(set VR128:$dst, 8130b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 8140b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; 8150b57cec5SDimitry Andric let isCommutable = 1 in 8160b57cec5SDimitry Andric def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 8170b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8180b57cec5SDimitry Andric "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8190b57cec5SDimitry Andric [(set VR128:$dst, 8200b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 8210b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, 8220b57cec5SDimitry Andric NotMemoryFoldable; 8230b57cec5SDimitry Andric} 8240b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 8250b57cec5SDimitry Andric def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 8260b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8270b57cec5SDimitry Andric "movlhps\t{$src2, $dst|$dst, $src2}", 8280b57cec5SDimitry Andric [(set VR128:$dst, 8290b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 8300b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 8310b57cec5SDimitry Andric let isCommutable = 1 in 8320b57cec5SDimitry Andric def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 8330b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8340b57cec5SDimitry Andric "movhlps\t{$src2, $dst|$dst, $src2}", 8350b57cec5SDimitry Andric [(set VR128:$dst, 8360b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 8370b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; 8380b57cec5SDimitry Andric} 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8410b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions 8420b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 845fe6060f1SDimitry Andric SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag, 8460b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 847480093f4SDimitry Andric Domain d, 8480b57cec5SDimitry Andric SchedRead Int2Fpu = ReadDefault> { 849480093f4SDimitry Andric let ExeDomain = d in { 8500b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 8510b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 8520b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 8530b57cec5SDimitry Andric Sched<[sched, Int2Fpu]>; 8540b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 8550b57cec5SDimitry Andric mem#"\t{$src, $dst|$dst, $src}", 8560b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 8570b57cec5SDimitry Andric Sched<[sched.Folded]>; 8580b57cec5SDimitry Andric } 859480093f4SDimitry Andric} 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 8620b57cec5SDimitry Andric ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 8630b57cec5SDimitry Andric string asm, Domain d, X86FoldableSchedWrite sched> { 864480093f4SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { 8650b57cec5SDimitry Andric def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 866480093f4SDimitry Andric [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>, 8670b57cec5SDimitry Andric Sched<[sched]>; 8680b57cec5SDimitry Andric let mayLoad = 1 in 8690b57cec5SDimitry Andric def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 870480093f4SDimitry Andric [(set RC:$dst, (DstTy (any_sint_to_fp 8710b57cec5SDimitry Andric (SrcTy (ld_frag addr:$src)))))], d>, 8720b57cec5SDimitry Andric Sched<[sched.Folded]>; 8730b57cec5SDimitry Andric} 8740b57cec5SDimitry Andric} 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 8770b57cec5SDimitry Andric X86MemOperand x86memop, string asm, string mem, 878480093f4SDimitry Andric X86FoldableSchedWrite sched, Domain d> { 879480093f4SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { 8800b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 8810b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 8820b57cec5SDimitry Andric Sched<[sched, ReadDefault, ReadInt2Fpu]>; 8830b57cec5SDimitry Andric let mayLoad = 1 in 8840b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 8850b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src), 8860b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 8870b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 8880b57cec5SDimitry Andric} // hasSideEffects = 0 8890b57cec5SDimitry Andric} 8900b57cec5SDimitry Andric 891480093f4SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 892480093f4SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 8930b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 894480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 8950b57cec5SDimitry Andric XS, VEX, VEX_LIG; 896480093f4SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 8970b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 898480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 8990b57cec5SDimitry Andric XS, VEX, VEX_W, VEX_LIG; 900480093f4SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 9010b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 902480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 9030b57cec5SDimitry Andric XD, VEX, VEX_LIG; 904480093f4SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 9050b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 906480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 9070b57cec5SDimitry Andric XD, VEX, VEX_W, VEX_LIG; 9085ffd83dbSDimitry Andric 9095ffd83dbSDimitry Andricdefm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 9105ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9115ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 9125ffd83dbSDimitry Andric XS, VEX, VEX_LIG; 9135ffd83dbSDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 9145ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9155ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 9165ffd83dbSDimitry Andric XS, VEX, VEX_W, VEX_LIG; 9175ffd83dbSDimitry Andricdefm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 9185ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9195ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 9205ffd83dbSDimitry Andric XD, VEX, VEX_LIG; 9215ffd83dbSDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 9225ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9235ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 9245ffd83dbSDimitry Andric XD, VEX, VEX_W, VEX_LIG; 9250b57cec5SDimitry Andric} 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx 9280b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands, 9290b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly 9300b57cec5SDimitry Andric// where appropriate to do so. 9310b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 9320b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 933480093f4SDimitry Andric WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 934480093f4SDimitry Andric VEX_LIG, SIMD_EXC; 9350b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 936480093f4SDimitry Andric WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 937480093f4SDimitry Andric VEX_W, VEX_LIG, SIMD_EXC; 9380b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 939480093f4SDimitry Andric WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 940480093f4SDimitry Andric VEX_LIG; 9410b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 942480093f4SDimitry Andric WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 943480093f4SDimitry Andric VEX_W, VEX_LIG, SIMD_EXC; 9440b57cec5SDimitry Andric} // isCodeGenOnly = 1 9450b57cec5SDimitry Andric 9460b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 947480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 9480b57cec5SDimitry Andric (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 949480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 9500b57cec5SDimitry Andric (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 951480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 9520b57cec5SDimitry Andric (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 953480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 9540b57cec5SDimitry Andric (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 9550b57cec5SDimitry Andric 956480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp GR32:$src)), 9570b57cec5SDimitry Andric (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 958480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp GR64:$src)), 9590b57cec5SDimitry Andric (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 960480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp GR32:$src)), 9610b57cec5SDimitry Andric (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 962480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp GR64:$src)), 9630b57cec5SDimitry Andric (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 9645ffd83dbSDimitry Andric 9655ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; 9665ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; 9675ffd83dbSDimitry Andric 9685ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; 9695ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; 9700b57cec5SDimitry Andric} 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 973480093f4SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 9740b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 975480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 976480093f4SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 9770b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 978480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 979480093f4SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 9800b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 981480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 982480093f4SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 9830b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 984480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 9855ffd83dbSDimitry Andric 9865ffd83dbSDimitry Andricdefm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 9875ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9885ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 9895ffd83dbSDimitry Andricdefm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 9905ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9915ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 9925ffd83dbSDimitry Andricdefm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 9935ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9945ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 9955ffd83dbSDimitry Andricdefm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 9965ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9975ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 9985ffd83dbSDimitry Andric 999480093f4SDimitry Andricdefm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, 10000b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{l}", 1001480093f4SDimitry Andric WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; 1002480093f4SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, 10030b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{q}", 1004480093f4SDimitry Andric WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; 1005480093f4SDimitry Andricdefm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, 10060b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{l}", 1007480093f4SDimitry Andric WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; 1008480093f4SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, 10090b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{q}", 1010480093f4SDimitry Andric WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; 10110b57cec5SDimitry Andric} // isCodeGenOnly = 1 10120b57cec5SDimitry Andric 10135ffd83dbSDimitry Andriclet Predicates = [UseSSE1] in { 10145ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; 10155ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; 10165ffd83dbSDimitry Andric} 10175ffd83dbSDimitry Andric 10185ffd83dbSDimitry Andriclet Predicates = [UseSSE2] in { 10195ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; 10205ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; 10215ffd83dbSDimitry Andric} 10225ffd83dbSDimitry Andric 10230b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM 10240b57cec5SDimitry Andric// and/or XMM operand(s). 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 10270b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, SDNode OpNode, 10285ffd83dbSDimitry Andric Operand memop, PatFrags mem_frags, string asm, 1029480093f4SDimitry Andric X86FoldableSchedWrite sched, Domain d> { 1030480093f4SDimitry Andriclet ExeDomain = d in { 10310b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 10320b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 10330b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 10340b57cec5SDimitry Andric Sched<[sched]>; 10350b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 10360b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 10375ffd83dbSDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>, 10380b57cec5SDimitry Andric Sched<[sched.Folded]>; 10390b57cec5SDimitry Andric} 1040480093f4SDimitry Andric} 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 10430b57cec5SDimitry Andric RegisterClass DstRC, X86MemOperand x86memop, 10440b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 1045480093f4SDimitry Andric Domain d, bit Is2Addr = 1> { 1046480093f4SDimitry Andriclet hasSideEffects = 0, ExeDomain = d in { 10470b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 10480b57cec5SDimitry Andric !if(Is2Addr, 10490b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 10500b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 10510b57cec5SDimitry Andric []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 10520b57cec5SDimitry Andric let mayLoad = 1 in 10530b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 10540b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src2), 10550b57cec5SDimitry Andric !if(Is2Addr, 10560b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 10570b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10580b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 10590b57cec5SDimitry Andric} 10600b57cec5SDimitry Andric} 10610b57cec5SDimitry Andric 1062480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 10630b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10640b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 10650b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1066480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 10670b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 10680b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1069480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; 10700b57cec5SDimitry Andric} 10710b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 1072480093f4SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1073480093f4SDimitry Andric SSEPackedDouble>, XD; 10740b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 1075480093f4SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1076480093f4SDimitry Andric SSEPackedDouble>, XD, REX_W; 1077480093f4SDimitry Andric} 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10800b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1081480093f4SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, 1082480093f4SDimitry Andric XS, VEX_4V, VEX_LIG, SIMD_EXC; 10830b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1084480093f4SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, 1085480093f4SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 10860b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1087480093f4SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, 1088480093f4SDimitry Andric XD, VEX_4V, VEX_LIG; 10890b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1090480093f4SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, 1091480093f4SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 10920b57cec5SDimitry Andric} 10930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 10940b57cec5SDimitry Andric defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1095480093f4SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, 1096480093f4SDimitry Andric XS, SIMD_EXC; 10970b57cec5SDimitry Andric defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1098480093f4SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, 1099480093f4SDimitry Andric XS, REX_W, SIMD_EXC; 11000b57cec5SDimitry Andric defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1101480093f4SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, 1102480093f4SDimitry Andric XD; 11030b57cec5SDimitry Andric defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1104480093f4SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, 1105480093f4SDimitry Andric XD, REX_W, SIMD_EXC; 11060b57cec5SDimitry Andric} 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11090b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 11100b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11110b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 11120b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11130b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 11140b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11150b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 11180b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 11190b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 11200b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 11210b57cec5SDimitry Andric 11220b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 11230b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 11240b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 11250b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 11260b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 11270b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 11280b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 11290b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 11320b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 11330b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 11340b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric/// SSE 1 Only 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric// Aliases for intrinsics 1139480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 11400b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 11410b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1142480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 11430b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 11440b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1145480093f4SDimitry Andric "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 11460b57cec5SDimitry Andric XS, VEX, VEX_LIG, VEX_W; 11470b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 11480b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1149480093f4SDimitry Andric WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 11500b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 11510b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1152480093f4SDimitry Andric "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, 11530b57cec5SDimitry Andric XD, VEX, VEX_LIG, VEX_W; 11540b57cec5SDimitry Andric} 1155480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 11560b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 11570b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1158480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS; 11590b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 11600b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1161480093f4SDimitry Andric "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1162480093f4SDimitry Andric XS, REX_W; 11630b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 11640b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1165480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, XD; 11660b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 11670b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1168480093f4SDimitry Andric "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, 1169480093f4SDimitry Andric XD, REX_W; 1170480093f4SDimitry Andric} 11710b57cec5SDimitry Andric 11720b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 11730b57cec5SDimitry Andric (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11740b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 11750b57cec5SDimitry Andric (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 11760b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 11770b57cec5SDimitry Andric (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11780b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 11790b57cec5SDimitry Andric (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 11800b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 11810b57cec5SDimitry Andric (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11820b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 11830b57cec5SDimitry Andric (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 11840b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 11850b57cec5SDimitry Andric (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11860b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 11870b57cec5SDimitry Andric (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 11900b57cec5SDimitry Andric (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11910b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 11920b57cec5SDimitry Andric (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 11930b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 11940b57cec5SDimitry Andric (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11950b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 11960b57cec5SDimitry Andric (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 11970b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 11980b57cec5SDimitry Andric (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11990b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 12000b57cec5SDimitry Andric (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 12010b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 12020b57cec5SDimitry Andric (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12030b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 12040b57cec5SDimitry Andric (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 12050b57cec5SDimitry Andric 1206480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 12070b57cec5SDimitry Andricdefm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 12080b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1209480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 12100b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 12110b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1212480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; 12130b57cec5SDimitry Andric} 1214480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 12150b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 12160b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1217480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS; 12180b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 12190b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1220480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andricdefm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 12230b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 12240b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 12250b57cec5SDimitry Andric PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; 12260b57cec5SDimitry Andricdefm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 12270b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 12280b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PSY>, 12290b57cec5SDimitry Andric PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 12320b57cec5SDimitry Andric "cvtdq2ps\t{$src, $dst|$dst, $src}", 12330b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 12340b57cec5SDimitry Andric PS, Requires<[UseSSE2]>; 1235480093f4SDimitry Andric} 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric// AVX aliases 12380b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 12390b57cec5SDimitry Andric (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12400b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 12410b57cec5SDimitry Andric (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 12420b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 12430b57cec5SDimitry Andric (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12440b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 12450b57cec5SDimitry Andric (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 12460b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 12470b57cec5SDimitry Andric (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12480b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 12490b57cec5SDimitry Andric (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 12500b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 12510b57cec5SDimitry Andric (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12520b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 12530b57cec5SDimitry Andric (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric// SSE aliases 12560b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 12570b57cec5SDimitry Andric (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12580b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 12590b57cec5SDimitry Andric (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 12600b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 12610b57cec5SDimitry Andric (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12620b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 12630b57cec5SDimitry Andric (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 12640b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 12650b57cec5SDimitry Andric (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12660b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 12670b57cec5SDimitry Andric (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 12680b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 12690b57cec5SDimitry Andric (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12700b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 12710b57cec5SDimitry Andric (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric/// SSE 2 Only 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric// Convert scalar double to scalar single 1276e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], 1277e8d8bef9SDimitry Andric ExeDomain = SSEPackedSingle in { 12780b57cec5SDimitry Andricdef VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 12790b57cec5SDimitry Andric (ins FR32:$src1, FR64:$src2), 12800b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 12810b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG, 1282480093f4SDimitry Andric Sched<[WriteCvtSD2SS]>, SIMD_EXC; 12830b57cec5SDimitry Andriclet mayLoad = 1 in 12840b57cec5SDimitry Andricdef VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 12850b57cec5SDimitry Andric (ins FR32:$src1, f64mem:$src2), 12860b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 12870b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, 1288480093f4SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 12890b57cec5SDimitry Andric} 12900b57cec5SDimitry Andric 1291480093f4SDimitry Andricdef : Pat<(f32 (any_fpround FR64:$src)), 12920b57cec5SDimitry Andric (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 12930b57cec5SDimitry Andric Requires<[UseAVX]>; 12940b57cec5SDimitry Andric 1295e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 12960b57cec5SDimitry Andricdef CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 12970b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1298480093f4SDimitry Andric [(set FR32:$dst, (any_fpround FR64:$src))]>, 1299480093f4SDimitry Andric Sched<[WriteCvtSD2SS]>, SIMD_EXC; 13000b57cec5SDimitry Andricdef CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 13010b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1302480093f4SDimitry Andric [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, 13030b57cec5SDimitry Andric XD, Requires<[UseSSE2, OptForSize]>, 1304*bdd1243dSDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 13050b57cec5SDimitry Andric} 13060b57cec5SDimitry Andric 1307e8d8bef9SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { 13080b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 13090b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13100b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13110b57cec5SDimitry Andric [(set VR128:$dst, 13120b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 13130b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 13140b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 13150b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 13160b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 13170b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13180b57cec5SDimitry Andric [(set VR128:$dst, 13195ffd83dbSDimitry Andric (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 13200b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 13210b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 13220b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 13230b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 13240b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13250b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 13260b57cec5SDimitry Andric [(set VR128:$dst, 13270b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 13280b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 13290b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 13300b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 13310b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 13320b57cec5SDimitry Andric [(set VR128:$dst, 13335ffd83dbSDimitry Andric (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 13340b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, 13350b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 13360b57cec5SDimitry Andric} 1337480093f4SDimitry Andric} 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric// Convert scalar single to scalar double 13400b57cec5SDimitry Andric// SSE2 instructions with XS prefix 1341e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 13420b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 13430b57cec5SDimitry Andric (ins FR64:$src1, FR32:$src2), 13440b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 13450b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 1346480093f4SDimitry Andric Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; 13470b57cec5SDimitry Andriclet mayLoad = 1 in 13480b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 13490b57cec5SDimitry Andric (ins FR64:$src1, f32mem:$src2), 13500b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 13510b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 13520b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 1353480093f4SDimitry Andric Requires<[UseAVX, OptForSize]>, SIMD_EXC; 13540b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0 13550b57cec5SDimitry Andric 1356480093f4SDimitry Andricdef : Pat<(f64 (any_fpextend FR32:$src)), 13570b57cec5SDimitry Andric (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 1358480093f4SDimitry Andricdef : Pat<(any_fpextend (loadf32 addr:$src)), 13590b57cec5SDimitry Andric (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 13600b57cec5SDimitry Andric 1361e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 13620b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 13630b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1364480093f4SDimitry Andric [(set FR64:$dst, (any_fpextend FR32:$src))]>, 1365480093f4SDimitry Andric XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; 13660b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 13670b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1368480093f4SDimitry Andric [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, 13690b57cec5SDimitry Andric XS, Requires<[UseSSE2, OptForSize]>, 1370*bdd1243dSDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC; 13710b57cec5SDimitry Andric} // isCodeGenOnly = 1 13720b57cec5SDimitry Andric 1373e8d8bef9SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, 1374e8d8bef9SDimitry Andric ExeDomain = SSEPackedSingle in { 13750b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 13760b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13770b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13780b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, 13790b57cec5SDimitry Andric Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 13800b57cec5SDimitry Andriclet mayLoad = 1 in 13810b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 13820b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 13830b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13840b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, 13850b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 13860b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 13870b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 13880b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13890b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 13900b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 13910b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>; 13920b57cec5SDimitry Andriclet mayLoad = 1 in 13930b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 13940b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 13950b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 13960b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 13970b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 13980b57cec5SDimitry Andric} 13990b57cec5SDimitry Andric} // hasSideEffects = 0 14000b57cec5SDimitry Andric 14010b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 14020b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 14030b57cec5SDimitry Andric// vmovs{s,d} instructions 14040b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 14050b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14060b57cec5SDimitry Andric (v4f32 VR128:$dst), 14070b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1408480093f4SDimitry Andric (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 14090b57cec5SDimitry Andric (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 14100b57cec5SDimitry Andric 14110b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14120b57cec5SDimitry Andric (v2f64 VR128:$dst), 14130b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1414480093f4SDimitry Andric (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 14150b57cec5SDimitry Andric (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 14160b57cec5SDimitry Andric 14170b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14180b57cec5SDimitry Andric (v4f32 VR128:$dst), 1419480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 14200b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 14210b57cec5SDimitry Andric 14220b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14230b57cec5SDimitry Andric (v4f32 VR128:$dst), 1424480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 14250b57cec5SDimitry Andric (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14280b57cec5SDimitry Andric (v4f32 VR128:$dst), 1429480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 14300b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 14310b57cec5SDimitry Andric 14320b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14330b57cec5SDimitry Andric (v4f32 VR128:$dst), 1434480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 14350b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14380b57cec5SDimitry Andric (v2f64 VR128:$dst), 1439480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 14400b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14430b57cec5SDimitry Andric (v2f64 VR128:$dst), 1444480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 14450b57cec5SDimitry Andric (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14480b57cec5SDimitry Andric (v2f64 VR128:$dst), 1449480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 14500b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 14510b57cec5SDimitry Andric 14520b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14530b57cec5SDimitry Andric (v2f64 VR128:$dst), 1454480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 14550b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 14560b57cec5SDimitry Andric} // Predicates = [UseAVX] 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 14590b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14600b57cec5SDimitry Andric (v4f32 VR128:$dst), 14610b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1462480093f4SDimitry Andric (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 14630b57cec5SDimitry Andric (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 14640b57cec5SDimitry Andric 14650b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14660b57cec5SDimitry Andric (v2f64 VR128:$dst), 14670b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1468480093f4SDimitry Andric (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 14690b57cec5SDimitry Andric (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 14700b57cec5SDimitry Andric 14710b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14720b57cec5SDimitry Andric (v2f64 VR128:$dst), 1473480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 14740b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14770b57cec5SDimitry Andric (v2f64 VR128:$dst), 1478480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 14790b57cec5SDimitry Andric (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14820b57cec5SDimitry Andric (v2f64 VR128:$dst), 1483480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 14840b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 14850b57cec5SDimitry Andric 14860b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14870b57cec5SDimitry Andric (v2f64 VR128:$dst), 1488480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 14890b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 14900b57cec5SDimitry Andric} // Predicates = [UseSSE2] 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 14930b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14940b57cec5SDimitry Andric (v4f32 VR128:$dst), 1495480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 14960b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14990b57cec5SDimitry Andric (v4f32 VR128:$dst), 1500480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 15010b57cec5SDimitry Andric (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15040b57cec5SDimitry Andric (v4f32 VR128:$dst), 1505480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 15060b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 15070b57cec5SDimitry Andric 15080b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15090b57cec5SDimitry Andric (v4f32 VR128:$dst), 1510480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 15110b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 15120b57cec5SDimitry Andric} // Predicates = [UseSSE1] 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15150b57cec5SDimitry Andric// Convert packed single/double fp to doubleword 15160b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15170b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15180b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1519480093f4SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; 15200b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15210b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15220b57cec5SDimitry Andric [(set VR128:$dst, 15230b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 1524480093f4SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; 15250b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 15260b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15270b57cec5SDimitry Andric [(set VR256:$dst, 15280b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 1529480093f4SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; 15300b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 15310b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15320b57cec5SDimitry Andric [(set VR256:$dst, 15330b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 1534480093f4SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; 15350b57cec5SDimitry Andric} 15360b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15370b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15380b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1539480093f4SDimitry Andric Sched<[WriteCvtPS2I]>, SIMD_EXC; 15400b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15410b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15420b57cec5SDimitry Andric [(set VR128:$dst, 15430b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 1544480093f4SDimitry Andric Sched<[WriteCvtPS2ILd]>, SIMD_EXC; 15450b57cec5SDimitry Andric 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers 1548480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 15490b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 15500b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 15510b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 15520b57cec5SDimitry Andricdef VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15530b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 15540b57cec5SDimitry Andric [(set VR128:$dst, 15550b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 15560b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 15570b57cec5SDimitry Andric 15580b57cec5SDimitry Andric// XMM only 15590b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15600b57cec5SDimitry Andric "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 15610b57cec5SDimitry Andric [(set VR128:$dst, 15620b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 15630b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>, VEX_WIG; 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric// YMM only 15660b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 15670b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 15680b57cec5SDimitry Andric [(set VR128:$dst, 15690b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 15700b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 15710b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 15720b57cec5SDimitry Andric "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 15730b57cec5SDimitry Andric [(set VR128:$dst, 15740b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 15750b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 15760b57cec5SDimitry Andric} 15770b57cec5SDimitry Andric 15780b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 15790b57cec5SDimitry Andric (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 15800b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 15810b57cec5SDimitry Andric (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andricdef CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15840b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 15850b57cec5SDimitry Andric [(set VR128:$dst, 15860b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 1587480093f4SDimitry Andric Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 15880b57cec5SDimitry Andricdef CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15890b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 15900b57cec5SDimitry Andric [(set VR128:$dst, 15910b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1592480093f4SDimitry Andric Sched<[WriteCvtPD2I]>, SIMD_EXC; 15930b57cec5SDimitry Andric 15940b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword 15950b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix 1596480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 15970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15980b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15990b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16000b57cec5SDimitry Andric [(set VR128:$dst, 1601480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 16020b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 16030b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16040b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16050b57cec5SDimitry Andric [(set VR128:$dst, 1606480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, 16070b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 16080b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 16090b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16100b57cec5SDimitry Andric [(set VR256:$dst, 1611480093f4SDimitry Andric (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, 16120b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 16130b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 16140b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16150b57cec5SDimitry Andric [(set VR256:$dst, 1616480093f4SDimitry Andric (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, 16170b57cec5SDimitry Andric VEX, VEX_L, 16180b57cec5SDimitry Andric Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 16190b57cec5SDimitry Andric} 16200b57cec5SDimitry Andric 16210b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16220b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16230b57cec5SDimitry Andric [(set VR128:$dst, 1624480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 16250b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 16260b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16270b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16280b57cec5SDimitry Andric [(set VR128:$dst, 1629480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, 16300b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 1631480093f4SDimitry Andric} 16320b57cec5SDimitry Andric 16330b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 16340b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 16350b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1636480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 16370b57cec5SDimitry Andric// XMM only 16380b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16390b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16400b57cec5SDimitry Andric [(set VR128:$dst, 1641480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 16420b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 16430b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16440b57cec5SDimitry Andric "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 16450b57cec5SDimitry Andric [(set VR128:$dst, 1646480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, 16470b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric// YMM only 16500b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 16510b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16520b57cec5SDimitry Andric [(set VR128:$dst, 1653480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, 16540b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 16550b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 16560b57cec5SDimitry Andric "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 16570b57cec5SDimitry Andric [(set VR128:$dst, 1658480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, 16590b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 16600b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 16610b57cec5SDimitry Andric 16620b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 16630b57cec5SDimitry Andric (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 16640b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 16650b57cec5SDimitry Andric (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1668480093f4SDimitry Andric def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))), 16690b57cec5SDimitry Andric (VCVTTPD2DQYrr VR256:$src)>; 1670480093f4SDimitry Andric def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))), 16710b57cec5SDimitry Andric (VCVTTPD2DQYrm addr:$src)>; 16720b57cec5SDimitry Andric} 16730b57cec5SDimitry Andric 16740b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16750b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16760b57cec5SDimitry Andric [(set VR128:$dst, 1677480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1678480093f4SDimitry Andric Sched<[WriteCvtPD2I]>, SIMD_EXC; 16790b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 16800b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16810b57cec5SDimitry Andric [(set VR128:$dst, 1682480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, 1683480093f4SDimitry Andric Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric// Convert packed single to packed double 1686480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 16870b57cec5SDimitry Andric // SSE2 instructions without OpSize prefix 16880b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16890b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1690480093f4SDimitry Andric [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 16910b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; 16920b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 16930b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 16940b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 16950b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; 16960b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 16970b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1698480093f4SDimitry Andric [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, 16990b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; 17000b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 17010b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 17020b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 17030b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; 17040b57cec5SDimitry Andric} 17050b57cec5SDimitry Andric 1706480093f4SDimitry Andriclet Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { 17070b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17080b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 1709480093f4SDimitry Andric [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 17100b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD]>; 17110b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 17120b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 17130b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 17140b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD.Folded]>; 17150b57cec5SDimitry Andric} 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP 17180b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 17190b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 17200b57cec5SDimitry Andricdef VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 17210b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17220b57cec5SDimitry Andric [(set VR128:$dst, 1723480093f4SDimitry Andric (v2f64 (X86any_VSintToFP 17240b57cec5SDimitry Andric (bc_v4i32 17250b57cec5SDimitry Andric (v2i64 (scalar_to_vector 17260b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 17270b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; 17280b57cec5SDimitry Andricdef VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17290b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17300b57cec5SDimitry Andric [(set VR128:$dst, 1731480093f4SDimitry Andric (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 17320b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; 17330b57cec5SDimitry Andricdef VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 17340b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17350b57cec5SDimitry Andric [(set VR256:$dst, 1736480093f4SDimitry Andric (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>, 17370b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 17380b57cec5SDimitry Andric VEX_WIG; 17390b57cec5SDimitry Andricdef VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 17400b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17410b57cec5SDimitry Andric [(set VR256:$dst, 1742480093f4SDimitry Andric (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>, 17430b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; 17440b57cec5SDimitry Andric} 17450b57cec5SDimitry Andric 17460b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 17470b57cec5SDimitry Andricdef CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 17480b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 17490b57cec5SDimitry Andric [(set VR128:$dst, 1750480093f4SDimitry Andric (v2f64 (X86any_VSintToFP 17510b57cec5SDimitry Andric (bc_v4i32 17520b57cec5SDimitry Andric (v2i64 (scalar_to_vector 17530b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 17540b57cec5SDimitry Andric Sched<[WriteCvtI2PDLd]>; 17550b57cec5SDimitry Andricdef CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17560b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 17570b57cec5SDimitry Andric [(set VR128:$dst, 1758480093f4SDimitry Andric (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 17590b57cec5SDimitry Andric Sched<[WriteCvtI2PD]>; 17600b57cec5SDimitry Andric 17610b57cec5SDimitry Andric// AVX register conversion intrinsics 17620b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1763480093f4SDimitry Andric def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 17640b57cec5SDimitry Andric (VCVTDQ2PDrm addr:$src)>; 17650b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 17660b57cec5SDimitry Andric 17670b57cec5SDimitry Andric// SSE2 register conversion intrinsics 17680b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1769480093f4SDimitry Andric def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 17700b57cec5SDimitry Andric (CVTDQ2PDrm addr:$src)>; 17710b57cec5SDimitry Andric} // Predicates = [UseSSE2] 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric// Convert packed double to packed single 17740b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 17750b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 17760b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1777480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 17780b57cec5SDimitry Andric// XMM only 17790b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17800b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1781349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 17820b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; 17830b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 17840b57cec5SDimitry Andric "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 1785349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>, 17860b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; 17870b57cec5SDimitry Andric 17880b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 17890b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1790349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>, 17910b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; 17920b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 17930b57cec5SDimitry Andric "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 1794349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>, 17950b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; 17960b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 17990b57cec5SDimitry Andric (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 18000b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 18010b57cec5SDimitry Andric (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 18020b57cec5SDimitry Andric 18030b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 18040b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1805349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 1806480093f4SDimitry Andric Sched<[WriteCvtPD2PS]>, SIMD_EXC; 18070b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 18080b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1809349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>, 1810480093f4SDimitry Andric Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18130b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions 18140b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18150b57cec5SDimitry Andric 18160b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 18170b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 18185ffd83dbSDimitry Andric Operand memop, SDNode OpNode, ValueType VT, 18190b57cec5SDimitry Andric PatFrag ld_frag, string asm, 18205ffd83dbSDimitry Andric X86FoldableSchedWrite sched, 18215ffd83dbSDimitry Andric PatFrags mem_frags> { 18225ffd83dbSDimitry Andric def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 18235ffd83dbSDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm, 18245ffd83dbSDimitry Andric [(set VR128:$dst, (OpNode (VT VR128:$src1), 18255ffd83dbSDimitry Andric VR128:$src2, timm:$cc))]>, 18265ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 18275ffd83dbSDimitry Andric let mayLoad = 1 in 18285ffd83dbSDimitry Andric def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 18295ffd83dbSDimitry Andric (ins VR128:$src1, memop:$src2, u8imm:$cc), asm, 18305ffd83dbSDimitry Andric [(set VR128:$dst, (OpNode (VT VR128:$src1), 18315ffd83dbSDimitry Andric (mem_frags addr:$src2), timm:$cc))]>, 18325ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 18335ffd83dbSDimitry Andric 18345ffd83dbSDimitry Andric let isCodeGenOnly = 1 in { 18350b57cec5SDimitry Andric let isCommutable = 1 in 18360b57cec5SDimitry Andric def rr : SIi8<0xC2, MRMSrcReg, 18370b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 18385ffd83dbSDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>, 18395ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 18400b57cec5SDimitry Andric def rm : SIi8<0xC2, MRMSrcMem, 18410b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 18425ffd83dbSDimitry Andric [(set RC:$dst, (OpNode RC:$src1, 18438bcb0991SDimitry Andric (ld_frag addr:$src2), timm:$cc))]>, 18445ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 18450b57cec5SDimitry Andric } 1846480093f4SDimitry Andric} 18470b57cec5SDimitry Andric 18480b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 18495ffd83dbSDimitry Andricdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 18500b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 18510b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 18520b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 18530b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 18545ffd83dbSDimitry Andricdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 18555ffd83dbSDimitry Andric "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 18560b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 18570b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 18585ffd83dbSDimitry Andric 18590b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 18600b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 18615ffd83dbSDimitry Andric defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 18625ffd83dbSDimitry Andric "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 18630b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; 18640b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 18655ffd83dbSDimitry Andric defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 18665ffd83dbSDimitry Andric "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 18670b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; 18680b57cec5SDimitry Andric} 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 1871fe6060f1SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode, 18720b57cec5SDimitry Andric ValueType vt, X86MemOperand x86memop, 1873480093f4SDimitry Andric PatFrag ld_frag, string OpcodeStr, Domain d, 18745ffd83dbSDimitry Andric X86FoldableSchedWrite sched = WriteFComX> { 18755ffd83dbSDimitry Andric let ExeDomain = d in { 18760b57cec5SDimitry Andric def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 18770b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18780b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 18795ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 18800b57cec5SDimitry Andric let mayLoad = 1 in 18810b57cec5SDimitry Andric def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 18820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18830b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 18840b57cec5SDimitry Andric (ld_frag addr:$src2)))]>, 18855ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 18860b57cec5SDimitry Andric} 18870b57cec5SDimitry Andric} 18880b57cec5SDimitry Andric 18890b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 18900b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 18910b57cec5SDimitry Andric ValueType vt, Operand memop, 18925ffd83dbSDimitry Andric PatFrags mem_frags, string OpcodeStr, 1893480093f4SDimitry Andric Domain d, 18945ffd83dbSDimitry Andric X86FoldableSchedWrite sched = WriteFComX> { 18955ffd83dbSDimitry Andriclet ExeDomain = d in { 18960b57cec5SDimitry Andric def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 18970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 18980b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 18995ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 19000b57cec5SDimitry Andriclet mayLoad = 1 in 19010b57cec5SDimitry Andric def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 19020b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 19030b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 19045ffd83dbSDimitry Andric (mem_frags addr:$src2)))]>, 19055ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 19060b57cec5SDimitry Andric} 1907480093f4SDimitry Andric} 19080b57cec5SDimitry Andric 19090b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 1910480093f4SDimitry Andric defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1911480093f4SDimitry Andric "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1912480093f4SDimitry Andric defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1913480093f4SDimitry Andric "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1914480093f4SDimitry Andric defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1915480093f4SDimitry Andric "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1916480093f4SDimitry Andric defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1917480093f4SDimitry Andric "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 19200b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1921480093f4SDimitry Andric sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 19220b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1923480093f4SDimitry Andric sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 19240b57cec5SDimitry Andric 19250b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1926480093f4SDimitry Andric sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 19270b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1928480093f4SDimitry Andric sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 19290b57cec5SDimitry Andric } 1930480093f4SDimitry Andric defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1931480093f4SDimitry Andric "ucomiss", SSEPackedSingle>, PS; 1932480093f4SDimitry Andric defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1933480093f4SDimitry Andric "ucomisd", SSEPackedDouble>, PD; 1934480093f4SDimitry Andric defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1935480093f4SDimitry Andric "comiss", SSEPackedSingle>, PS; 1936480093f4SDimitry Andric defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1937480093f4SDimitry Andric "comisd", SSEPackedDouble>, PD; 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 19400b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1941480093f4SDimitry Andric sse_load_f32, "ucomiss", SSEPackedSingle>, PS; 19420b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1943480093f4SDimitry Andric sse_load_f64, "ucomisd", SSEPackedDouble>, PD; 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1946480093f4SDimitry Andric sse_load_f32, "comiss", SSEPackedSingle>, PS; 19470b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1948480093f4SDimitry Andric sse_load_f64, "comisd", SSEPackedDouble>, PD; 19490b57cec5SDimitry Andric } 19500b57cec5SDimitry Andric} // Defs = [EFLAGS] 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions 19530b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 19540b57cec5SDimitry Andric ValueType VT, string asm, 19550b57cec5SDimitry Andric X86FoldableSchedWrite sched, 19560b57cec5SDimitry Andric Domain d, PatFrag ld_frag> { 19570b57cec5SDimitry Andric let isCommutable = 1 in 19580b57cec5SDimitry Andric def rri : PIi8<0xC2, MRMSrcReg, 19590b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1960480093f4SDimitry Andric [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, 19615ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 19620b57cec5SDimitry Andric def rmi : PIi8<0xC2, MRMSrcMem, 19630b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 19640b57cec5SDimitry Andric [(set RC:$dst, 1965480093f4SDimitry Andric (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, 19665ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1967480093f4SDimitry Andric} 19680b57cec5SDimitry Andric 19690b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 19700b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19710b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; 19720b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 19730b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19740b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; 19750b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 19760b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19770b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; 19780b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 19790b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 19800b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; 19810b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 19820b57cec5SDimitry Andric defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 19830b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 19840b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; 19850b57cec5SDimitry Andric defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 19860b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 19870b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; 19880b57cec5SDimitry Andric} 19890b57cec5SDimitry Andric 19908bcb0991SDimitry Andricdef CommutableCMPCC : PatLeaf<(timm), [{ 19910b57cec5SDimitry Andric uint64_t Imm = N->getZExtValue() & 0x7; 19920b57cec5SDimitry Andric return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 19930b57cec5SDimitry Andric}]>; 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric// Patterns to select compares with loads in first operand. 19960b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 1997480093f4SDimitry Andric def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, 19980b57cec5SDimitry Andric CommutableCMPCC:$cc)), 19998bcb0991SDimitry Andric (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; 20000b57cec5SDimitry Andric 2001480093f4SDimitry Andric def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, 20020b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20038bcb0991SDimitry Andric (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; 20040b57cec5SDimitry Andric 2005480093f4SDimitry Andric def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, 20060b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20078bcb0991SDimitry Andric (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 20080b57cec5SDimitry Andric 2009480093f4SDimitry Andric def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, 20100b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20118bcb0991SDimitry Andric (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 20120b57cec5SDimitry Andric 20130b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 20140b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20158bcb0991SDimitry Andric (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 20180b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20198bcb0991SDimitry Andric (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 20200b57cec5SDimitry Andric} 20210b57cec5SDimitry Andric 20220b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 2023480093f4SDimitry Andric def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, 20240b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20258bcb0991SDimitry Andric (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 20260b57cec5SDimitry Andric 20270b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 20280b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20298bcb0991SDimitry Andric (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 20300b57cec5SDimitry Andric} 20310b57cec5SDimitry Andric 20320b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 2033480093f4SDimitry Andric def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, 20340b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20358bcb0991SDimitry Andric (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 20360b57cec5SDimitry Andric 20370b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 20380b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20398bcb0991SDimitry Andric (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 20400b57cec5SDimitry Andric} 20410b57cec5SDimitry Andric 20420b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20430b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions 20440b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 20470b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 20480b57cec5SDimitry Andric ValueType vt, string asm, PatFrag mem_frag, 20490b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 20500b57cec5SDimitry Andric bit IsCommutable = 0> { 20510b57cec5SDimitry Andric def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 20520b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 20530b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 20548bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 20550b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 20560b57cec5SDimitry Andric let isCommutable = IsCommutable in 20570b57cec5SDimitry Andric def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 20580b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 20590b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 20608bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 20610b57cec5SDimitry Andric Sched<[sched]>; 20620b57cec5SDimitry Andric} 20630b57cec5SDimitry Andric 20640b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 20650b57cec5SDimitry Andric defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 20660b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20670b57cec5SDimitry Andric loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 20680b57cec5SDimitry Andric PS, VEX_4V, VEX_WIG; 20690b57cec5SDimitry Andric defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 20700b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20710b57cec5SDimitry Andric loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 20720b57cec5SDimitry Andric PS, VEX_4V, VEX_L, VEX_WIG; 20730b57cec5SDimitry Andric defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 20740b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20750b57cec5SDimitry Andric loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 20760b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 20770b57cec5SDimitry Andric defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 20780b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20790b57cec5SDimitry Andric loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 20800b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 20810b57cec5SDimitry Andric} 20820b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 20830b57cec5SDimitry Andric defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 20840b57cec5SDimitry Andric "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 20850b57cec5SDimitry Andric memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 20860b57cec5SDimitry Andric defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 20870b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 20880b57cec5SDimitry Andric memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 20890b57cec5SDimitry Andric} 20900b57cec5SDimitry Andric 20910b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20920b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions 20930b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20940b57cec5SDimitry Andric 20950b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 20960b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 20970b57cec5SDimitry Andric PatFrag mem_frag, RegisterClass RC, 20980b57cec5SDimitry Andric X86MemOperand x86memop, string asm, 20990b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 21000b57cec5SDimitry Andric bit IsCommutable = 0> { 21010b57cec5SDimitry Andric let isCommutable = IsCommutable in 21020b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, 21030b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 21040b57cec5SDimitry Andric asm, [(set RC:$dst, 21050b57cec5SDimitry Andric (vt (OpNode RC:$src1, RC:$src2)))], d>, 21060b57cec5SDimitry Andric Sched<[sched]>; 21070b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, 21080b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 21090b57cec5SDimitry Andric asm, [(set RC:$dst, 21100b57cec5SDimitry Andric (vt (OpNode RC:$src1, 21110b57cec5SDimitry Andric (mem_frag addr:$src2))))], d>, 21120b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 21130b57cec5SDimitry Andric} 21140b57cec5SDimitry Andric 21150b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 21160b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 21170b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21180b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 21190b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 21200b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21210b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; 21220b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 21230b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21240b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 21250b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 21260b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21270b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; 21280b57cec5SDimitry Andric 21290b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 21300b57cec5SDimitry Andric VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21310b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 21320b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 21330b57cec5SDimitry Andric VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21340b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 21350b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 21360b57cec5SDimitry Andric VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21370b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 21380b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 21390b57cec5SDimitry Andric VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 21400b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 21410b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX] 21420b57cec5SDimitry Andric 21430b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 21440b57cec5SDimitry Andric defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 21450b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 21460b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 21470b57cec5SDimitry Andric defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 21480b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 21490b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 21500b57cec5SDimitry Andric defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 21510b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 21520b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 21530b57cec5SDimitry Andric defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 21540b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 21550b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; 21560b57cec5SDimitry Andric} // Constraints = "$src1 = $dst" 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 21590b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 21600b57cec5SDimitry Andric (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 21610b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 21620b57cec5SDimitry Andric (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 21630b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 21640b57cec5SDimitry Andric (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 21650b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 21660b57cec5SDimitry Andric (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 21670b57cec5SDimitry Andric 21680b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 21690b57cec5SDimitry Andric (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 21700b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 21710b57cec5SDimitry Andric (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 21720b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 21730b57cec5SDimitry Andric (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 21740b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 21750b57cec5SDimitry Andric (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 21760b57cec5SDimitry Andric} 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 21790b57cec5SDimitry Andric // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 21800b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 21818bcb0991SDimitry Andric (v2f64 (simple_load addr:$src2)))), 21820b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 21830b57cec5SDimitry Andric} 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21860b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask 21870b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21880b57cec5SDimitry Andric 21890b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 21900b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 21910b57cec5SDimitry Andric string asm, Domain d> { 21920b57cec5SDimitry Andric def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 21930b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 21940b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 21950b57cec5SDimitry Andric Sched<[WriteFMOVMSK]>; 21960b57cec5SDimitry Andric} 21970b57cec5SDimitry Andric 21980b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 21990b57cec5SDimitry Andric defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 22000b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_WIG; 22010b57cec5SDimitry Andric defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 22020b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_WIG; 22030b57cec5SDimitry Andric defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 22040b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; 22050b57cec5SDimitry Andric defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 22060b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 22090b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 22100b57cec5SDimitry Andric (VMOVMSKPSrr VR128:$src)>; 22110b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 22120b57cec5SDimitry Andric (VMOVMSKPDrr VR128:$src)>; 22130b57cec5SDimitry Andric def : Pat<(X86movmsk (v8i32 VR256:$src)), 22140b57cec5SDimitry Andric (VMOVMSKPSYrr VR256:$src)>; 22150b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i64 VR256:$src)), 22160b57cec5SDimitry Andric (VMOVMSKPDYrr VR256:$src)>; 22170b57cec5SDimitry Andric} 22180b57cec5SDimitry Andric 22190b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 22200b57cec5SDimitry Andric SSEPackedSingle>, PS; 22210b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 22220b57cec5SDimitry Andric SSEPackedDouble>, PD; 22230b57cec5SDimitry Andric 22240b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 22250b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 22260b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 22270b57cec5SDimitry Andric (MOVMSKPSrr VR128:$src)>; 22280b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 22290b57cec5SDimitry Andric (MOVMSKPDrr VR128:$src)>; 22300b57cec5SDimitry Andric} 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 22330b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 22340b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 22350b57cec5SDimitry Andric 22360b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 22370b57cec5SDimitry Andric 22380b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator. 22390b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 22400b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 22410b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 22420b57cec5SDimitry Andric bit IsCommutable, bit Is2Addr> { 22430b57cec5SDimitry Andric let isCommutable = IsCommutable in 22440b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 22450b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 22460b57cec5SDimitry Andric !if(Is2Addr, 22470b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 22480b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 22490b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 22500b57cec5SDimitry Andric Sched<[sched]>; 22510b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 22520b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 22530b57cec5SDimitry Andric !if(Is2Addr, 22540b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 22550b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 22560b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 22570b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 22580b57cec5SDimitry Andric} 22590b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 22600b57cec5SDimitry Andric 22610b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 22620b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, 22630b57cec5SDimitry Andric X86SchedWriteWidths sched, bit IsCommutable, 22640b57cec5SDimitry Andric Predicate prd> { 22650b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 22660b57cec5SDimitry Andric defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 22670b57cec5SDimitry Andric VR128, load, i128mem, sched.XMM, 22680b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_WIG; 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 22710b57cec5SDimitry Andric defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 22720b57cec5SDimitry Andric memop, i128mem, sched.XMM, IsCommutable, 1>; 22730b57cec5SDimitry Andric 22740b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 22750b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 22760b57cec5SDimitry Andric OpVT256, VR256, load, i256mem, sched.YMM, 22770b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; 22780b57cec5SDimitry Andric} 22790b57cec5SDimitry Andric 22800b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions 22810b57cec5SDimitry Andric 22820b57cec5SDimitry Andricdefm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 22830b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22840b57cec5SDimitry Andricdefm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 22850b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22860b57cec5SDimitry Andricdefm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 22870b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 22880b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 22890b57cec5SDimitry Andric SchedWriteVecLogic, 0, NoVLX>; 22900b57cec5SDimitry Andric 22910b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22920b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions 22930b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 22960b57cec5SDimitry Andric/// 22970b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2 22980b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later. 22990b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2300349cc55cSDimitry Andric X86SchedWriteWidths sched> { 23010b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 23020b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 23030b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 23040b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 23070b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 23080b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; 23090b57cec5SDimitry Andric 23100b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 23110b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 23120b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_WIG; 23130b57cec5SDimitry Andric 23140b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 23150b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 23160b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_WIG; 23170b57cec5SDimitry Andric } 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 23200b57cec5SDimitry Andric defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 23210b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 23220b57cec5SDimitry Andric [], []>, PS; 23230b57cec5SDimitry Andric 23240b57cec5SDimitry Andric defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 23250b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 23260b57cec5SDimitry Andric [], []>, PD; 23270b57cec5SDimitry Andric } 23280b57cec5SDimitry Andric} 23290b57cec5SDimitry Andric 2330349cc55cSDimitry Andricdefm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>; 2331349cc55cSDimitry Andricdefm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>; 2332349cc55cSDimitry Andricdefm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>; 23330b57cec5SDimitry Andriclet isCommutable = 0 in 2334349cc55cSDimitry Andric defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>; 23350b57cec5SDimitry Andric 23360b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 23370b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 23380b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23390b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 23400b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23410b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 23420b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23430b57cec5SDimitry Andric 23440b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 23450b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23460b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 23470b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23480b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 23490b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23500b57cec5SDimitry Andric 23510b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 23520b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23530b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 23540b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23550b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 23560b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23570b57cec5SDimitry Andric 23580b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 23590b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23600b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 23610b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23620b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 23630b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23640b57cec5SDimitry Andric 23650b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 23660b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23670b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 23680b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23690b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 23700b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23710b57cec5SDimitry Andric 23720b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 23730b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23740b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 23750b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23760b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 23770b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23780b57cec5SDimitry Andric 23790b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 23800b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23810b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 23820b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23830b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 23840b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 23850b57cec5SDimitry Andric 23860b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 23870b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23880b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 23890b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23900b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 23910b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 23920b57cec5SDimitry Andric} 23930b57cec5SDimitry Andric 23940b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with 23950b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available. 23960b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 23970b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 23980b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 23990b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 24000b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24010b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 24020b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24030b57cec5SDimitry Andric def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 24040b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 24070b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24080b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 24090b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24100b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 24110b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24120b57cec5SDimitry Andric def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 24130b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24140b57cec5SDimitry Andric 24150b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 24160b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24170b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 24180b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24190b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 24200b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24210b57cec5SDimitry Andric def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 24220b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24230b57cec5SDimitry Andric 24240b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 24250b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24260b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 24270b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24280b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 24290b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24300b57cec5SDimitry Andric def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 24310b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24320b57cec5SDimitry Andric 24330b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 24340b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24350b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 24360b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24370b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 24380b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24390b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 24400b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24410b57cec5SDimitry Andric 24420b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 24430b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24440b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 24450b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24460b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 24470b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24480b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 24490b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24500b57cec5SDimitry Andric 24510b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 24520b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24530b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 24540b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24550b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 24560b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24570b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 24580b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24590b57cec5SDimitry Andric 24600b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 24610b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24620b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 24630b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24640b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 24650b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24660b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 24670b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24680b57cec5SDimitry Andric} 24690b57cec5SDimitry Andric 24700b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 24710b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 24720b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24730b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 24740b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24750b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 24760b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24770b57cec5SDimitry Andric 24780b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 24790b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24800b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 24810b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24820b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 24830b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 24840b57cec5SDimitry Andric 24850b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 24860b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24870b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 24880b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24890b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 24900b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 24910b57cec5SDimitry Andric 24920b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 24930b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24940b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 24950b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24960b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 24970b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 24980b57cec5SDimitry Andric 24990b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 25000b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25010b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 25020b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25030b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 25040b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25050b57cec5SDimitry Andric 25060b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 25070b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25080b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 25090b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25100b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 25110b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25120b57cec5SDimitry Andric 25130b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 25140b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25150b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 25160b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25170b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 25180b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25190b57cec5SDimitry Andric 25200b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 25210b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25220b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 25230b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25240b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 25250b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25260b57cec5SDimitry Andric} 25270b57cec5SDimitry Andric 25280b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 25290b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 25300b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25310b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 25320b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25330b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 25340b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25350b57cec5SDimitry Andric 25360b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 25370b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25380b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 25390b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25400b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 25410b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 25440b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25450b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 25460b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25470b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 25480b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25490b57cec5SDimitry Andric 25500b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 25510b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25520b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 25530b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25540b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 25550b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25560b57cec5SDimitry Andric 25570b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 25580b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25590b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 25600b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25610b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 25620b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25630b57cec5SDimitry Andric 25640b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 25650b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25660b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 25670b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25680b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 25690b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25700b57cec5SDimitry Andric 25710b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 25720b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25730b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 25740b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25750b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 25760b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25770b57cec5SDimitry Andric 25780b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 25790b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25800b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 25810b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25820b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 25830b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 25840b57cec5SDimitry Andric} 25850b57cec5SDimitry Andric 25860b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available. 25870b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 25880b57cec5SDimitry Andric (ANDPSrr VR128:$src1, VR128:$src2)>; 25890b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 25900b57cec5SDimitry Andric (ORPSrr VR128:$src1, VR128:$src2)>; 25910b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 25920b57cec5SDimitry Andric (XORPSrr VR128:$src1, VR128:$src2)>; 25930b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 25940b57cec5SDimitry Andric (ANDNPSrr VR128:$src1, VR128:$src2)>; 25950b57cec5SDimitry Andric 25960b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 25970b57cec5SDimitry Andric (ANDPSrm VR128:$src1, addr:$src2)>; 25980b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 25990b57cec5SDimitry Andric (ORPSrm VR128:$src1, addr:$src2)>; 26000b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 26010b57cec5SDimitry Andric (XORPSrm VR128:$src1, addr:$src2)>; 26020b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 26030b57cec5SDimitry Andric (ANDNPSrm VR128:$src1, addr:$src2)>; 26040b57cec5SDimitry Andric 26050b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 26060b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions 26070b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 26100b57cec5SDimitry Andric/// vector forms. 26110b57cec5SDimitry Andric/// 26120b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 26130b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 26140b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar) 26150b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted). 26160b57cec5SDimitry Andric/// 26170b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem. 26180b57cec5SDimitry Andric/// 26190b57cec5SDimitry Andric 26200b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 26210b57cec5SDimitry Andric/// classes below 26220b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2623fe6060f1SDimitry Andric SDPatternOperator OpNode, X86SchedWriteSizes sched> { 2624480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 26250b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 26260b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 26270b57cec5SDimitry Andric VR128, v4f32, f128mem, loadv4f32, 26280b57cec5SDimitry Andric SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; 26290b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 26300b57cec5SDimitry Andric VR128, v2f64, f128mem, loadv2f64, 26310b57cec5SDimitry Andric SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; 26320b57cec5SDimitry Andric 26330b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 26340b57cec5SDimitry Andric OpNode, VR256, v8f32, f256mem, loadv8f32, 26350b57cec5SDimitry Andric SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; 26360b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 26370b57cec5SDimitry Andric OpNode, VR256, v4f64, f256mem, loadv4f64, 26380b57cec5SDimitry Andric SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; 26390b57cec5SDimitry Andric } 26400b57cec5SDimitry Andric 26410b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 26420b57cec5SDimitry Andric defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 26430b57cec5SDimitry Andric v4f32, f128mem, memopv4f32, SSEPackedSingle, 26440b57cec5SDimitry Andric sched.PS.XMM>, PS; 26450b57cec5SDimitry Andric defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 26460b57cec5SDimitry Andric v2f64, f128mem, memopv2f64, SSEPackedDouble, 26470b57cec5SDimitry Andric sched.PD.XMM>, PD; 26480b57cec5SDimitry Andric } 26490b57cec5SDimitry Andric} 2650480093f4SDimitry Andric} 26510b57cec5SDimitry Andric 2652fe6060f1SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 26530b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2654480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 26550b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 26560b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 26570b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 26580b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 26590b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 26600b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 26610b57cec5SDimitry Andric 26620b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 26630b57cec5SDimitry Andric defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 26640b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, 26650b57cec5SDimitry Andric sched.PS.Scl>, XS; 26660b57cec5SDimitry Andric defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 26670b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, 26680b57cec5SDimitry Andric sched.PD.Scl>, XD; 26690b57cec5SDimitry Andric } 26700b57cec5SDimitry Andric} 2671480093f4SDimitry Andric} 26720b57cec5SDimitry Andric 26730b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 26740b57cec5SDimitry Andric SDPatternOperator OpNode, 26750b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2676480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 2677349cc55cSDimitry Andric defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 26780b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 26790b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; 2680349cc55cSDimitry Andric defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 26810b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 26820b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2685349cc55cSDimitry Andric defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 26860b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 26870b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl>, XS; 2688349cc55cSDimitry Andric defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 26890b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 26900b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl>, XD; 26910b57cec5SDimitry Andric } 26920b57cec5SDimitry Andric} 2693480093f4SDimitry Andric} 26940b57cec5SDimitry Andric 26950b57cec5SDimitry Andric// Binary Arithmetic instructions 2696480093f4SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2697480093f4SDimitry Andric basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, 26980b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 2699480093f4SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2700480093f4SDimitry Andric basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 27010b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 27020b57cec5SDimitry Andriclet isCommutable = 0 in { 2703480093f4SDimitry Andric defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2704480093f4SDimitry Andric basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 27050b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 2706480093f4SDimitry Andric defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2707480093f4SDimitry Andric basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 27080b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 27090b57cec5SDimitry Andric defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 27100b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 27110b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 27120b57cec5SDimitry Andric defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 27130b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 27140b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 27150b57cec5SDimitry Andric} 27160b57cec5SDimitry Andric 27170b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 27180b57cec5SDimitry Andric defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 27190b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 27200b57cec5SDimitry Andric defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 27210b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 27220b57cec5SDimitry Andric} 27230b57cec5SDimitry Andric 27240b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from 27250b57cec5SDimitry Andric// either: 27260b57cec5SDimitry Andric// 27270b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend 27280b57cec5SDimitry Andric// 27290b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector 27300b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions 27310b57cec5SDimitry Andric// like addss or mulss. 27320b57cec5SDimitry Andric// 27330b57cec5SDimitry Andric// For example, given the following code: 27340b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 27350b57cec5SDimitry Andric// A[0] += B[0]; 27360b57cec5SDimitry Andric// return A; 27370b57cec5SDimitry Andric// } 27380b57cec5SDimitry Andric// 27390b57cec5SDimitry Andric// Previously we generated: 27400b57cec5SDimitry Andric// addss %xmm0, %xmm1 27410b57cec5SDimitry Andric// movss %xmm1, %xmm0 27420b57cec5SDimitry Andric// 27430b57cec5SDimitry Andric// We now generate: 27440b57cec5SDimitry Andric// addss %xmm1, %xmm0 27450b57cec5SDimitry Andric// 27460b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert 27470b57cec5SDimitry Andric// 27480b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction 27490b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction. 27500b57cec5SDimitry Andric// 27510b57cec5SDimitry Andric// For example, given the following code: 27520b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 27530b57cec5SDimitry Andric// __m128 C = A + B; 27540b57cec5SDimitry Andric// return (__m128) {c[0], a[1], a[2], a[3]}; 27550b57cec5SDimitry Andric// } 27560b57cec5SDimitry Andric// 27570b57cec5SDimitry Andric// Previously we generated: 27580b57cec5SDimitry Andric// addps %xmm0, %xmm1 27590b57cec5SDimitry Andric// movss %xmm1, %xmm0 27600b57cec5SDimitry Andric// 27610b57cec5SDimitry Andric// We now generate: 27620b57cec5SDimitry Andric// addss %xmm1, %xmm0 27630b57cec5SDimitry Andric 27640b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of 27650b57cec5SDimitry Andric// patterns we have to try to match. 2766fe6060f1SDimitry Andricmulticlass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move, 27670b57cec5SDimitry Andric ValueType VT, ValueType EltTy, 27680b57cec5SDimitry Andric RegisterClass RC, PatFrag ld_frag, 27690b57cec5SDimitry Andric Predicate BasePredicate> { 27700b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 27710b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 27720b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27730b57cec5SDimitry Andric (VT (scalar_to_vector 27740b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27750b57cec5SDimitry Andric RC:$src))))), 27760b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 27770b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 27780b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27790b57cec5SDimitry Andric (VT (scalar_to_vector 27800b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27810b57cec5SDimitry Andric (ld_frag addr:$src)))))), 27820b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 27830b57cec5SDimitry Andric } 27840b57cec5SDimitry Andric 27850b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 27860b57cec5SDimitry Andric let Predicates = [UseAVX] in { 27870b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 27880b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27890b57cec5SDimitry Andric (VT (scalar_to_vector 27900b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27910b57cec5SDimitry Andric RC:$src))))), 27920b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 27930b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 27940b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27950b57cec5SDimitry Andric (VT (scalar_to_vector 27960b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27970b57cec5SDimitry Andric (ld_frag addr:$src)))))), 27980b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 27990b57cec5SDimitry Andric } 28000b57cec5SDimitry Andric} 28010b57cec5SDimitry Andric 2802480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2803480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2804480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2805480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 28060b57cec5SDimitry Andric 2807480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2808480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2809480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2810480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 28110b57cec5SDimitry Andric 28120b57cec5SDimitry Andric/// Unop Arithmetic 28130b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 28140b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 28150b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a 28160b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined. 28170b57cec5SDimitry Andric/// 28180b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form. 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form 28210b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because 28220b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive. 28230b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2824349cc55cSDimitry Andric X86MemOperand x86memop, Operand intmemop, 2825349cc55cSDimitry Andric SDPatternOperator OpNode, Domain d, 28260b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 28270b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 28280b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 28290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 28300b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 28310b57cec5SDimitry Andric Requires<[target]>; 28320b57cec5SDimitry Andric let mayLoad = 1 in 28330b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 28340b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 28350b57cec5SDimitry Andric [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 28360b57cec5SDimitry Andric Sched<[sched.Folded]>, 28370b57cec5SDimitry Andric Requires<[target, OptForSize]>; 28380b57cec5SDimitry Andric } 28390b57cec5SDimitry Andric 28400b57cec5SDimitry Andric let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 28410b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 28420b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 28430b57cec5SDimitry Andric Sched<[sched]>; 28440b57cec5SDimitry Andric let mayLoad = 1 in 28450b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 28460b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 28470b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 28480b57cec5SDimitry Andric } 28490b57cec5SDimitry Andric 28500b57cec5SDimitry Andric} 28510b57cec5SDimitry Andric 2852349cc55cSDimitry Andricmulticlass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 2853349cc55cSDimitry Andric Intrinsic Intr, Predicate target> { 28540b57cec5SDimitry Andric let Predicates = [target] in { 28550b57cec5SDimitry Andric // These are unary operations, but they are modeled as having 2 source operands 28560b57cec5SDimitry Andric // because the high elements of the destination are unchanged in SSE. 28570b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 28580b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 28590b57cec5SDimitry Andric } 28600b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 28610b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 28620b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 28630b57cec5SDimitry Andric // movss mem, %xmm0 28640b57cec5SDimitry Andric // rcpss %xmm0, %xmm0 28650b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 28660b57cec5SDimitry Andric // rcpss mem, %xmm0 28670b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 28685ffd83dbSDimitry Andric def : Pat<(Intr (mem_frags addr:$src2)), 28690b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 28700b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 28710b57cec5SDimitry Andric } 28720b57cec5SDimitry Andric} 28730b57cec5SDimitry Andric 2874349cc55cSDimitry Andricmulticlass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 28750b57cec5SDimitry Andric Intrinsic Intr, Predicate target> { 28760b57cec5SDimitry Andric let Predicates = [target] in { 28770b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 28780b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, 28790b57cec5SDimitry Andric VR128:$src)>; 28800b57cec5SDimitry Andric } 28810b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 28825ffd83dbSDimitry Andric def : Pat<(Intr (mem_frags addr:$src2)), 28830b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 28840b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 28850b57cec5SDimitry Andric } 28860b57cec5SDimitry Andric} 28870b57cec5SDimitry Andric 28880b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 28890b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 2890fe6060f1SDimitry Andric Operand intmemop, SDPatternOperator OpNode, Domain d, 28910b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 28920b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 28930b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 28940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28950b57cec5SDimitry Andric [], d>, Sched<[sched]>; 28960b57cec5SDimitry Andric let mayLoad = 1 in 28970b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 28980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 28990b57cec5SDimitry Andric [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 29000b57cec5SDimitry Andric } 29010b57cec5SDimitry Andric let hasSideEffects = 0, ExeDomain = d in { 29020b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 29030b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 29040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29050b57cec5SDimitry Andric []>, Sched<[sched]>; 29060b57cec5SDimitry Andric let mayLoad = 1 in 29070b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 29080b57cec5SDimitry Andric (ins VR128:$src1, intmemop:$src2), 29090b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29100b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 29110b57cec5SDimitry Andric } 29120b57cec5SDimitry Andric 29130b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 29140b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 29150b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 29160b57cec5SDimitry Andric // vmovss mem, %xmm0 29170b57cec5SDimitry Andric // vrcpss %xmm0, %xmm0, %xmm0 29180b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 29190b57cec5SDimitry Andric // vrcpss mem, %xmm0, %xmm0 29200b57cec5SDimitry Andric // TODO: In theory, we could fold the load, and avoid the stall caused by 29210b57cec5SDimitry Andric // the partial register store, either in BreakFalseDeps or with smarter RA. 29220b57cec5SDimitry Andric let Predicates = [target] in { 29230b57cec5SDimitry Andric def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 29240b57cec5SDimitry Andric (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 29250b57cec5SDimitry Andric } 29260b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 29270b57cec5SDimitry Andric def : Pat<(ScalarVT (OpNode (load addr:$src))), 29280b57cec5SDimitry Andric (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 29290b57cec5SDimitry Andric addr:$src)>; 29300b57cec5SDimitry Andric } 29310b57cec5SDimitry Andric} 29320b57cec5SDimitry Andric 29330b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form. 2934fe6060f1SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 29350b57cec5SDimitry Andric X86SchedWriteWidths sched, list<Predicate> prds> { 29360b57cec5SDimitry Andriclet Predicates = prds in { 29370b57cec5SDimitry Andric def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29380b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29390b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29400b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 29410b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 29420b57cec5SDimitry Andric def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29430b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29440b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29450b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 29460b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 29470b57cec5SDimitry Andric def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 29480b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29490b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29500b57cec5SDimitry Andric [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 29510b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 29520b57cec5SDimitry Andric def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 29530b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29540b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29550b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 29560b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 29570b57cec5SDimitry Andric} 29580b57cec5SDimitry Andric 29590b57cec5SDimitry Andric def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29600b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 29610b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 29620b57cec5SDimitry Andric Sched<[sched.XMM]>; 29630b57cec5SDimitry Andric def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29640b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 29650b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 29660b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 29670b57cec5SDimitry Andric} 29680b57cec5SDimitry Andric 29690b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms. 29700b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2971fe6060f1SDimitry Andric SDPatternOperator OpNode, X86SchedWriteWidths sched> { 29720b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 29730b57cec5SDimitry Andric def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29740b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29750b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29760b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 29770b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 29780b57cec5SDimitry Andric def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29790b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29800b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29810b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 29820b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 29830b57cec5SDimitry Andric def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 29840b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29850b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29860b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 29870b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 29880b57cec5SDimitry Andric def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 29890b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29900b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 29910b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 29920b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 29930b57cec5SDimitry Andric} 29940b57cec5SDimitry Andric 29950b57cec5SDimitry Andric def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29960b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 29970b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 29980b57cec5SDimitry Andric Sched<[sched.XMM]>; 29990b57cec5SDimitry Andric def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 30000b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 30010b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 30020b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 30030b57cec5SDimitry Andric} 30040b57cec5SDimitry Andric 3005349cc55cSDimitry Andricmulticlass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> { 3006349cc55cSDimitry Andric defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32, 30075ffd83dbSDimitry Andric !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 3008349cc55cSDimitry Andric UseSSE1>, XS; 3009349cc55cSDimitry Andric defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32, 30105ffd83dbSDimitry Andric !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 30110b57cec5SDimitry Andric AVXTarget>, 30120b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; 30130b57cec5SDimitry Andric} 30140b57cec5SDimitry Andric 3015fe6060f1SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 30160b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 3017349cc55cSDimitry Andric defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem, 30180b57cec5SDimitry Andric ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; 30195ffd83dbSDimitry Andric defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32, 30200b57cec5SDimitry Andric f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 30210b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 30220b57cec5SDimitry Andric} 30230b57cec5SDimitry Andric 3024fe6060f1SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 30250b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 3026349cc55cSDimitry Andric defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem, 30270b57cec5SDimitry Andric sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; 30285ffd83dbSDimitry Andric defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64, 30290b57cec5SDimitry Andric f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 30300b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 30310b57cec5SDimitry Andric} 30320b57cec5SDimitry Andric 30330b57cec5SDimitry Andric// Square root. 3034480093f4SDimitry Andricdefm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, 3035480093f4SDimitry Andric sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 3036480093f4SDimitry Andric sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, 3037480093f4SDimitry Andric sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; 30380b57cec5SDimitry Andric 30390b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement 30400b57cec5SDimitry Andric// in order to obtain suitable precision. 30410b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 3042349cc55cSDimitry Andric sse1_fp_unop_s_intr<"rsqrt", HasAVX>, 30430b57cec5SDimitry Andric sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 30440b57cec5SDimitry Andricdefm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 3045349cc55cSDimitry Andric sse1_fp_unop_s_intr<"rcp", HasAVX>, 30460b57cec5SDimitry Andric sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 30470b57cec5SDimitry Andric 30480b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions. 30490b57cec5SDimitry Andric 3050fe6060f1SDimitry Andricmulticlass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move, 30510b57cec5SDimitry Andric ValueType VT, Predicate BasePredicate> { 30520b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 30530b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 30540b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 30550b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30560b57cec5SDimitry Andric } 30570b57cec5SDimitry Andric 30580b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 30590b57cec5SDimitry Andric let Predicates = [UseAVX] in { 30600b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 30610b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 30620b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30630b57cec5SDimitry Andric } 30640b57cec5SDimitry Andric} 30650b57cec5SDimitry Andric 3066480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 3067480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 30680b57cec5SDimitry Andric 30690b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 30700b57cec5SDimitry Andric SDNode Move, ValueType VT, 30710b57cec5SDimitry Andric Predicate BasePredicate> { 30720b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 30730b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 30740b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30750b57cec5SDimitry Andric } 30760b57cec5SDimitry Andric 30770b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 30780b57cec5SDimitry Andric let Predicates = [HasAVX] in { 30790b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 30800b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30810b57cec5SDimitry Andric } 30820b57cec5SDimitry Andric} 30830b57cec5SDimitry Andric 30840b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 30850b57cec5SDimitry Andric v4f32, UseSSE1>; 30860b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 30870b57cec5SDimitry Andric v4f32, UseSSE1>; 30880b57cec5SDimitry Andric 30890b57cec5SDimitry Andric 30900b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 30910b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores 30920b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 30930b57cec5SDimitry Andric 30940b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 30950b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 30960b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 30970b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 30980b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 30990b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31000b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), 31010b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 31020b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 31030b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 31040b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31050b57cec5SDimitry Andric [(alignednontemporalstore (v2f64 VR128:$src), 31060b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 31070b57cec5SDimitry Andric} // SchedRW 31080b57cec5SDimitry Andric 31090b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 31100b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 31110b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 31120b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31130b57cec5SDimitry Andric [(alignednontemporalstore (v8f32 VR256:$src), 31140b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 31150b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 31160b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 31170b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31180b57cec5SDimitry Andric [(alignednontemporalstore (v4f64 VR256:$src), 31190b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 31200b57cec5SDimitry Andric} // SchedRW 31210b57cec5SDimitry Andric 31220b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 31230b57cec5SDimitry Andricdef VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 31240b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 31250b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31260b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), 31270b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG, 31280b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 31290b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 31300b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 31310b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31320b57cec5SDimitry Andric [(alignednontemporalstore (v4i64 VR256:$src), 31330b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG, 31340b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 31350b57cec5SDimitry Andric} // ExeDomain 31360b57cec5SDimitry Andric} // Predicates 31370b57cec5SDimitry Andric 31380b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 31390b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31400b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31410b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 31420b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31430b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31440b57cec5SDimitry Andric [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 31450b57cec5SDimitry Andric} // SchedRW 31460b57cec5SDimitry Andric 31470b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 31480b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31490b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31500b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 31510b57cec5SDimitry Andric 31520b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in { 31530b57cec5SDimitry Andric// There is no AVX form for instructions below this point 31540b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 31550b57cec5SDimitry Andric "movnti{l}\t{$src, $dst|$dst, $src}", 31560b57cec5SDimitry Andric [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 31570b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 31580b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 31590b57cec5SDimitry Andric "movnti{q}\t{$src, $dst|$dst, $src}", 31600b57cec5SDimitry Andric [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 31610b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 31620b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT] 31630b57cec5SDimitry Andric 31640b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 31650b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 31660b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31670b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 31680b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3169fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst), 3170fcaf7f86SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31710b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 31720b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31730b57cec5SDimitry Andric 31740b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 31750b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31760b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 31770b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 3178fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), 3179fcaf7f86SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31800b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 31810b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 31820b57cec5SDimitry Andric} 31830b57cec5SDimitry Andric 31840b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 31850b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 31860b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31870b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 31880b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 3189fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), 3190fcaf7f86SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31910b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 31920b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 31930b57cec5SDimitry Andric} 31940b57cec5SDimitry Andric 31950b57cec5SDimitry Andric} // AddedComplexity 31960b57cec5SDimitry Andric 31970b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31980b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence 31990b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32000b57cec5SDimitry Andric 32010b57cec5SDimitry Andric// Prefetch intrinsic. 32020b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 32030b57cec5SDimitry Andricdef PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 32040b57cec5SDimitry Andric "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; 32050b57cec5SDimitry Andricdef PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 32060b57cec5SDimitry Andric "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; 32070b57cec5SDimitry Andricdef PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 32080b57cec5SDimitry Andric "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; 32090b57cec5SDimitry Andricdef PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 32100b57cec5SDimitry Andric "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; 32110b57cec5SDimitry Andric} 32120b57cec5SDimitry Andric 32130b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled? 32140b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in { 32150b57cec5SDimitry Andric// Flush cache 32160b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 32170b57cec5SDimitry Andric "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3218*bdd1243dSDimitry Andric PS, Requires<[HasCLFLUSH]>; 32190b57cec5SDimitry Andric} 32200b57cec5SDimitry Andric 32210b57cec5SDimitry Andriclet SchedRW = [WriteNop] in { 32220b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it 32230b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible. 32240b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins), 32250b57cec5SDimitry Andric "pause", [(int_x86_sse2_pause)]>, OBXS; 32260b57cec5SDimitry Andric} 32270b57cec5SDimitry Andric 32280b57cec5SDimitry Andriclet SchedRW = [WriteFence] in { 32290b57cec5SDimitry Andric// Load, store, and memory fence 32305ffd83dbSDimitry Andric// TODO: As with mfence, we may want to ease the availability of sfence/lfence 32310b57cec5SDimitry Andric// to include any 64-bit target. 32325ffd83dbSDimitry Andricdef SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 32330b57cec5SDimitry Andric PS, Requires<[HasSSE1]>; 32345ffd83dbSDimitry Andricdef LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 32350b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 32365ffd83dbSDimitry Andricdef MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 32370b57cec5SDimitry Andric PS, Requires<[HasMFence]>; 32380b57cec5SDimitry Andric} // SchedRW 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>; 32410b57cec5SDimitry Andric 32420b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32430b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register 32440b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32450b57cec5SDimitry Andric 3246*bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in 32470b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 32480b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 32490b57cec5SDimitry Andric VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; 3250*bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in 32510b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 32520b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 32530b57cec5SDimitry Andric VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; 32540b57cec5SDimitry Andric 3255*bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in 32560b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 32570b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 32585ffd83dbSDimitry Andric PS, Sched<[WriteLDMXCSR]>; 3259*bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in 32600b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 32610b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 32625ffd83dbSDimitry Andric PS, Sched<[WriteSTMXCSR]>; 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 32650b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 32660b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 32670b57cec5SDimitry Andric 32680b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 32690b57cec5SDimitry Andric 32700b57cec5SDimitry Andriclet hasSideEffects = 0 in { 32710b57cec5SDimitry Andricdef VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32720b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32730b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 32740b57cec5SDimitry Andricdef VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32750b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32760b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 32770b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 32780b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32790b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 32800b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 32810b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32820b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 32830b57cec5SDimitry Andric} 32840b57cec5SDimitry Andric 32850b57cec5SDimitry Andric// For Disassembler 32860b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 32870b57cec5SDimitry Andricdef VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32880b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32890b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 32900b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; 32910b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 32920b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 32930b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 32940b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; 32950b57cec5SDimitry Andricdef VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 32960b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 32970b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 32980b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; 32990b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 33000b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 33010b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 33020b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; 33030b57cec5SDimitry Andric} 33040b57cec5SDimitry Andric 33050b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 33060b57cec5SDimitry Andric hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 33070b57cec5SDimitry Andricdef VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33080b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33090b57cec5SDimitry Andric [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 33100b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 33110b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 33120b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33130b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 33140b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 33150b57cec5SDimitry Andricdef VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33160b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 33170b57cec5SDimitry Andric [(set VR128:$dst, (loadv2i64 addr:$src))]>, 33180b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, 33190b57cec5SDimitry Andric XS, VEX, VEX_WIG; 33200b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 33210b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", []>, 33220b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 33230b57cec5SDimitry Andric XS, VEX, VEX_L, VEX_WIG; 33240b57cec5SDimitry Andric} 33250b57cec5SDimitry Andric 33260b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 33270b57cec5SDimitry Andricdef VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 33280b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 33290b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33300b57cec5SDimitry Andric [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 33310b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; 33320b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 33330b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 33340b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33350b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; 33360b57cec5SDimitry Andricdef VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33370b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 33380b57cec5SDimitry Andric [(store (v2i64 VR128:$src), addr:$dst)]>, 33390b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; 33400b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 33410b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 33420b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; 33430b57cec5SDimitry Andric} 33440b57cec5SDimitry Andric 33450b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 33460b57cec5SDimitry Andriclet hasSideEffects = 0 in { 33470b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 33480b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>; 33490b57cec5SDimitry Andric 33500b57cec5SDimitry Andricdef MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 33510b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 33520b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 33530b57cec5SDimitry Andric} 33540b57cec5SDimitry Andric 33550b57cec5SDimitry Andric// For Disassembler 33560b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 33570b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 33580b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33590b57cec5SDimitry Andric FoldGenData<"MOVDQArr">; 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andricdef MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 33620b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 33630b57cec5SDimitry Andric XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">; 33640b57cec5SDimitry Andric} 33650b57cec5SDimitry Andric} // SchedRW 33660b57cec5SDimitry Andric 33670b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 33680b57cec5SDimitry Andric hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 33690b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33700b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33710b57cec5SDimitry Andric [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 33720b57cec5SDimitry Andricdef MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33730b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 33740b57cec5SDimitry Andric [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 33750b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 33760b57cec5SDimitry Andric} 33770b57cec5SDimitry Andric 33780b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, 33790b57cec5SDimitry Andric SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 33800b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33810b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33820b57cec5SDimitry Andric [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 33830b57cec5SDimitry Andricdef MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33840b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 33850b57cec5SDimitry Andric [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 33860b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 33870b57cec5SDimitry Andric} 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 33900b57cec5SDimitry Andric 33910b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 33920b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 33930b57cec5SDimitry Andric (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 33940b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 33950b57cec5SDimitry Andric (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 33960b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 33970b57cec5SDimitry Andric (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 33980b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 33990b57cec5SDimitry Andric (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 34000b57cec5SDimitry Andric 34010b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 34020b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 34030b57cec5SDimitry Andric (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 34040b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 34050b57cec5SDimitry Andric (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 34060b57cec5SDimitry Andric 34070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 34080b57cec5SDimitry Andric // Additional patterns for other integer sizes. 34090b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 34100b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 34110b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 34120b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 3413fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 3414fcaf7f86SDimitry Andric (VMOVDQArm addr:$src)>; 34150b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 34160b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 34170b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 34180b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 34190b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 34200b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 3421fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 3422fcaf7f86SDimitry Andric (VMOVDQUrm addr:$src)>; 34230b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 34240b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 34250b57cec5SDimitry Andric 34260b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 34270b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34280b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 34290b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 3430fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 3431fcaf7f86SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34320b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 34330b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34340b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 34350b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34360b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 34370b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 3438fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 3439fcaf7f86SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34400b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 34410b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34420b57cec5SDimitry Andric} 34430b57cec5SDimitry Andric 34440b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34450b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions 34460b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34470b57cec5SDimitry Andric 34480b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 34490b57cec5SDimitry Andric 34500b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 34510b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 34520b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, RegisterClass RC, 34530b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 34540b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 34550b57cec5SDimitry Andric let isCommutable = 1 in 34560b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 34570b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 34580b57cec5SDimitry Andric !if(Is2Addr, 34590b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34600b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34610b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 34620b57cec5SDimitry Andric Sched<[sched]>; 34630b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 34640b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 34650b57cec5SDimitry Andric !if(Is2Addr, 34660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34680b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 34690b57cec5SDimitry Andric (memop_frag addr:$src2))))]>, 34700b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 34710b57cec5SDimitry Andric} 34720b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 34730b57cec5SDimitry Andric 34740b57cec5SDimitry Andricdefm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 34750b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34760b57cec5SDimitry Andricdefm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 34770b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34780b57cec5SDimitry Andricdefm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 34790b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 34800b57cec5SDimitry Andricdefm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 34810b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 34820b57cec5SDimitry Andricdefm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 34830b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34840b57cec5SDimitry Andricdefm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 34850b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34860b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 34870b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34880b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 34890b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34900b57cec5SDimitry Andricdefm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 34910b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34920b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 34930b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34940b57cec5SDimitry Andricdefm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 34950b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 34960b57cec5SDimitry Andricdefm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 34970b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 34980b57cec5SDimitry Andricdefm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 34990b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35000b57cec5SDimitry Andricdefm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 35010b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 35020b57cec5SDimitry Andricdefm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 35030b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 35040b57cec5SDimitry Andricdefm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 35050b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35060b57cec5SDimitry Andricdefm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 35070b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35080b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 35090b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35100b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 35110b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35120b57cec5SDimitry Andricdefm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 35130b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35140b57cec5SDimitry Andricdefm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 35150b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35160b57cec5SDimitry Andricdefm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 35170b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35180b57cec5SDimitry Andricdefm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 35190b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 352081ad6265SDimitry Andricdefm PAVGB : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8, 35210b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 352281ad6265SDimitry Andricdefm PAVGW : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16, 35230b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35240b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 35250b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX>; 35260b57cec5SDimitry Andric 35270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 35280b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 35290b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 35300b57cec5SDimitry Andric VEX_4V, VEX_WIG; 35310b57cec5SDimitry Andric 35320b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 35330b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 35340b57cec5SDimitry Andric VR256, load, i256mem, SchedWriteVecIMul.YMM, 35350b57cec5SDimitry Andric 0>, VEX_4V, VEX_L, VEX_WIG; 35360b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35370b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 35380b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM>; 35390b57cec5SDimitry Andric 35400b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 35410b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 35420b57cec5SDimitry Andric load, i128mem, SchedWritePSADBW.XMM, 0>, 35430b57cec5SDimitry Andric VEX_4V, VEX_WIG; 35440b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 35450b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 35460b57cec5SDimitry Andric load, i256mem, SchedWritePSADBW.YMM, 0>, 35470b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 35480b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35490b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 35500b57cec5SDimitry Andric memop, i128mem, SchedWritePSADBW.XMM>; 35510b57cec5SDimitry Andric 35520b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35530b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 35540b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35550b57cec5SDimitry Andric 35560b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 35570b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 35580b57cec5SDimitry Andric SDNode OpNode2, RegisterClass RC, 35590b57cec5SDimitry Andric X86FoldableSchedWrite sched, 35600b57cec5SDimitry Andric X86FoldableSchedWrite schedImm, 35610b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, 35620b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 35630b57cec5SDimitry Andric // src2 is always 128-bit 35640b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 35650b57cec5SDimitry Andric (ins RC:$src1, VR128:$src2), 35660b57cec5SDimitry Andric !if(Is2Addr, 35670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35680b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 35690b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 35700b57cec5SDimitry Andric Sched<[sched]>; 35710b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 35720b57cec5SDimitry Andric (ins RC:$src1, i128mem:$src2), 35730b57cec5SDimitry Andric !if(Is2Addr, 35740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 35760b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, 35770b57cec5SDimitry Andric (SrcVT (ld_frag addr:$src2)))))]>, 35780b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 35790b57cec5SDimitry Andric def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 35800b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 35810b57cec5SDimitry Andric !if(Is2Addr, 35820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 35848bcb0991SDimitry Andric [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, 35850b57cec5SDimitry Andric Sched<[schedImm]>; 35860b57cec5SDimitry Andric} 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 35890b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 35900b57cec5SDimitry Andric SDNode OpNode2, ValueType DstVT128, 35910b57cec5SDimitry Andric ValueType DstVT256, ValueType SrcVT, 35920b57cec5SDimitry Andric X86SchedWriteWidths sched, 35930b57cec5SDimitry Andric X86SchedWriteWidths schedImm, Predicate prd> { 35940b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 35950b57cec5SDimitry Andric defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 35960b57cec5SDimitry Andric OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 35970b57cec5SDimitry Andric DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG; 35980b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 35990b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 36000b57cec5SDimitry Andric OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 36010b57cec5SDimitry Andric DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, 36020b57cec5SDimitry Andric VEX_WIG; 36030b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 36040b57cec5SDimitry Andric defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 36050b57cec5SDimitry Andric VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 36060b57cec5SDimitry Andric memop>; 36070b57cec5SDimitry Andric} 36080b57cec5SDimitry Andric 36090b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 36100b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, ValueType VT, 36110b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 36120b57cec5SDimitry Andric def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 36130b57cec5SDimitry Andric !if(Is2Addr, 36140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36150b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36168bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, 36170b57cec5SDimitry Andric Sched<[sched]>; 36180b57cec5SDimitry Andric} 36190b57cec5SDimitry Andric 36200b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 36210b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 36220b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 36230b57cec5SDimitry Andric defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 36240b57cec5SDimitry Andric VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG; 36250b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 36260b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 36270b57cec5SDimitry Andric VR256, v32i8, sched.YMM, 0>, 36280b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 36290b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 36300b57cec5SDimitry Andric defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 36310b57cec5SDimitry Andric sched.XMM>; 36320b57cec5SDimitry Andric} 36330b57cec5SDimitry Andric 36340b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 36350b57cec5SDimitry Andric defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 36360b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36370b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36380b57cec5SDimitry Andric defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 36390b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36400b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36410b57cec5SDimitry Andric defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 36420b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 36430b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36440b57cec5SDimitry Andric 36450b57cec5SDimitry Andric defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 36460b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36470b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36480b57cec5SDimitry Andric defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 36490b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36500b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36510b57cec5SDimitry Andric defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 36520b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 36530b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36540b57cec5SDimitry Andric 36550b57cec5SDimitry Andric defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 36560b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36570b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36580b57cec5SDimitry Andric defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 36590b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36600b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36610b57cec5SDimitry Andric 36620b57cec5SDimitry Andric defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 36630b57cec5SDimitry Andric SchedWriteShuffle>; 36640b57cec5SDimitry Andric defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 36650b57cec5SDimitry Andric SchedWriteShuffle>; 36660b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 36670b57cec5SDimitry Andric 36680b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36690b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions 36700b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 36730b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 36740b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 36750b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 36760b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 36770b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 36780b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 36790b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 36800b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 36810b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 36820b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 36830b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 36840b57cec5SDimitry Andric 36850b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36860b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions 36870b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36880b57cec5SDimitry Andric 36890b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 36900b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 36910b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, 36920b57cec5SDimitry Andric Predicate prd> { 36930b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in { 36940b57cec5SDimitry Andric def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 36950b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 36960b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 36970b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 36980b57cec5SDimitry Andric [(set VR128:$dst, 36998bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 37000b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 37010b57cec5SDimitry Andric def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 37020b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 37030b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37040b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37050b57cec5SDimitry Andric [(set VR128:$dst, 37060b57cec5SDimitry Andric (vt128 (OpNode (load addr:$src1), 37078bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, 37080b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>, VEX_WIG; 37090b57cec5SDimitry Andric} 37100b57cec5SDimitry Andric 37110b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in { 37120b57cec5SDimitry Andric def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 37130b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 37140b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37150b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37160b57cec5SDimitry Andric [(set VR256:$dst, 37178bcb0991SDimitry Andric (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, 37180b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 37190b57cec5SDimitry Andric def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 37200b57cec5SDimitry Andric (ins i256mem:$src1, u8imm:$src2), 37210b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37220b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37230b57cec5SDimitry Andric [(set VR256:$dst, 37240b57cec5SDimitry Andric (vt256 (OpNode (load addr:$src1), 37258bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, VEX_L, 37260b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>, VEX_WIG; 37270b57cec5SDimitry Andric} 37280b57cec5SDimitry Andric 37290b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 37300b57cec5SDimitry Andric def ri : Ii8<0x70, MRMSrcReg, 37310b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 37320b57cec5SDimitry Andric !strconcat(OpcodeStr, 37330b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37340b57cec5SDimitry Andric [(set VR128:$dst, 37358bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 37360b57cec5SDimitry Andric Sched<[sched.XMM]>; 37370b57cec5SDimitry Andric def mi : Ii8<0x70, MRMSrcMem, 37380b57cec5SDimitry Andric (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 37390b57cec5SDimitry Andric !strconcat(OpcodeStr, 37400b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37410b57cec5SDimitry Andric [(set VR128:$dst, 37420b57cec5SDimitry Andric (vt128 (OpNode (memop addr:$src1), 37438bcb0991SDimitry Andric (i8 timm:$src2))))]>, 37440b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 37450b57cec5SDimitry Andric} 37460b57cec5SDimitry Andric} 37470b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 37480b57cec5SDimitry Andric 37490b57cec5SDimitry Andricdefm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 37500b57cec5SDimitry Andric SchedWriteShuffle, NoVLX>, PD; 37510b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 37520b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; 37530b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 37540b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; 37550b57cec5SDimitry Andric 37560b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37570b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX) 37580b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37590b57cec5SDimitry Andric 37600b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 37610b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 37620b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 37630b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 37640b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 37650b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 37660b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 37670b57cec5SDimitry Andric !if(Is2Addr, 37680b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37690b57cec5SDimitry Andric !strconcat(OpcodeStr, 37700b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37710b57cec5SDimitry Andric [(set RC:$dst, 37720b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 37730b57cec5SDimitry Andric Sched<[sched]>; 37740b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 37750b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 37760b57cec5SDimitry Andric !if(Is2Addr, 37770b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37780b57cec5SDimitry Andric !strconcat(OpcodeStr, 37790b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37800b57cec5SDimitry Andric [(set RC:$dst, 37810b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 37820b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 37830b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 37840b57cec5SDimitry Andric} 37850b57cec5SDimitry Andric 37860b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 37870b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 37880b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 37890b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 37900b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, 37910b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 37920b57cec5SDimitry Andric !if(Is2Addr, 37930b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37940b57cec5SDimitry Andric !strconcat(OpcodeStr, 37950b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37960b57cec5SDimitry Andric [(set RC:$dst, 37970b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 37980b57cec5SDimitry Andric Sched<[sched]>; 37990b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, 38000b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 38010b57cec5SDimitry Andric !if(Is2Addr, 38020b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 38030b57cec5SDimitry Andric !strconcat(OpcodeStr, 38040b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38050b57cec5SDimitry Andric [(set RC:$dst, 38060b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 38070b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 38080b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 38090b57cec5SDimitry Andric} 38100b57cec5SDimitry Andric 38110b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 38120b57cec5SDimitry Andric defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 38130b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38140b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38150b57cec5SDimitry Andric defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 38160b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38170b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38180b57cec5SDimitry Andric 38190b57cec5SDimitry Andric defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 38200b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38210b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38220b57cec5SDimitry Andric defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 38230b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3824d409305fSDimitry Andric VEX_4V, VEX_WIG; 38250b57cec5SDimitry Andric} 38260b57cec5SDimitry Andric 38270b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 38280b57cec5SDimitry Andric defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 38290b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38300b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38310b57cec5SDimitry Andric defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 38320b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38330b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38340b57cec5SDimitry Andric 38350b57cec5SDimitry Andric defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 38360b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 38370b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38380b57cec5SDimitry Andric defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 38390b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3840d409305fSDimitry Andric VEX_4V, VEX_L, VEX_WIG; 38410b57cec5SDimitry Andric} 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 38440b57cec5SDimitry Andric defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 38450b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38460b57cec5SDimitry Andric defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 38470b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38480b57cec5SDimitry Andric 38490b57cec5SDimitry Andric defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 38500b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38510b57cec5SDimitry Andric 38520b57cec5SDimitry Andric defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 38530b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38540b57cec5SDimitry Andric} 38550b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 38560b57cec5SDimitry Andric 38570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38580b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions 38590b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38600b57cec5SDimitry Andric 38610b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 38620b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 38630b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 38640b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 38650b57cec5SDimitry Andric bit Is2Addr = 1> { 38660b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 38670b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 38680b57cec5SDimitry Andric !if(Is2Addr, 38690b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 38700b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38710b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 38720b57cec5SDimitry Andric Sched<[sched]>; 38730b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 38740b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 38750b57cec5SDimitry Andric !if(Is2Addr, 38760b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 38770b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38780b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 38790b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 38800b57cec5SDimitry Andric} 38810b57cec5SDimitry Andric 38820b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 38830b57cec5SDimitry Andric defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 38840b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38850b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38860b57cec5SDimitry Andric defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 38870b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38880b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38890b57cec5SDimitry Andric defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 38900b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38910b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38920b57cec5SDimitry Andric defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 38930b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 38940b57cec5SDimitry Andric VEX_4V, VEX_WIG; 38950b57cec5SDimitry Andric} 38960b57cec5SDimitry Andric 38970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 38980b57cec5SDimitry Andric defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 38990b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 39000b57cec5SDimitry Andric VEX_4V, VEX_WIG; 39010b57cec5SDimitry Andric defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 39020b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 39030b57cec5SDimitry Andric VEX_4V, VEX_WIG; 39040b57cec5SDimitry Andric defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 39050b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 39060b57cec5SDimitry Andric VEX_4V, VEX_WIG; 39070b57cec5SDimitry Andric defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 39080b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 39090b57cec5SDimitry Andric VEX_4V, VEX_WIG; 39100b57cec5SDimitry Andric} 39110b57cec5SDimitry Andric 39120b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 39130b57cec5SDimitry Andric defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 39140b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39150b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39160b57cec5SDimitry Andric defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 39170b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39180b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39190b57cec5SDimitry Andric defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 39200b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39210b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39220b57cec5SDimitry Andric defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 39230b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39240b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39250b57cec5SDimitry Andric} 39260b57cec5SDimitry Andric 39270b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 39280b57cec5SDimitry Andric defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 39290b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39300b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39310b57cec5SDimitry Andric defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 39320b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39330b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39340b57cec5SDimitry Andric defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 39350b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39360b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39370b57cec5SDimitry Andric defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 39380b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 39390b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 39400b57cec5SDimitry Andric} 39410b57cec5SDimitry Andric 39420b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 39430b57cec5SDimitry Andric defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 39440b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39450b57cec5SDimitry Andric defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 39460b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39470b57cec5SDimitry Andric defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 39480b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39490b57cec5SDimitry Andric defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 39500b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39510b57cec5SDimitry Andric 39520b57cec5SDimitry Andric defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 39530b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39540b57cec5SDimitry Andric defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 39550b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39560b57cec5SDimitry Andric defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 39570b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39580b57cec5SDimitry Andric defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 39590b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39600b57cec5SDimitry Andric} 39610b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 39620b57cec5SDimitry Andric 39630b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39640b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert 39650b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39660b57cec5SDimitry Andric 39670b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 39680b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> { 39690b57cec5SDimitry Andric def rr : Ii8<0xC4, MRMSrcReg, 39700b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 39710b57cec5SDimitry Andric GR32orGR64:$src2, u8imm:$src3), 39720b57cec5SDimitry Andric !if(Is2Addr, 39730b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 39740b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 39750b57cec5SDimitry Andric [(set VR128:$dst, 3976e8d8bef9SDimitry Andric (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 39770b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 39780b57cec5SDimitry Andric def rm : Ii8<0xC4, MRMSrcMem, 39790b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 39800b57cec5SDimitry Andric i16mem:$src2, u8imm:$src3), 39810b57cec5SDimitry Andric !if(Is2Addr, 39820b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 39830b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 39840b57cec5SDimitry Andric [(set VR128:$dst, 39850b57cec5SDimitry Andric (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 3986e8d8bef9SDimitry Andric timm:$src3))]>, 39870b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 39880b57cec5SDimitry Andric} 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric// Extract 39910b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 39920b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 39930b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 39940b57cec5SDimitry Andric "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 39950b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3996e8d8bef9SDimitry Andric timm:$src2))]>, 39970b57cec5SDimitry Andric PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; 39980b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 39990b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 40000b57cec5SDimitry Andric "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 40010b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4002e8d8bef9SDimitry Andric timm:$src2))]>, 40030b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric// Insert 40060b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 40070b57cec5SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG; 40080b57cec5SDimitry Andric 40090b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 40100b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD; 40110b57cec5SDimitry Andric 40120b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40130b57cec5SDimitry Andric 401481ad6265SDimitry Andric// Always select FP16 instructions if available. 401581ad6265SDimitry Andriclet Predicates = [UseSSE2], AddedComplexity = -10 in { 401681ad6265SDimitry Andric def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (PINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; 401781ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (MOV16mr addr:$dst, (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit))>; 401881ad6265SDimitry Andric def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; 401981ad6265SDimitry Andric def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (PINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; 402081ad6265SDimitry Andric} 402181ad6265SDimitry Andric 402281ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in { 402381ad6265SDimitry Andric def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; 402481ad6265SDimitry Andric def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; 402581ad6265SDimitry Andric def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; 402681ad6265SDimitry Andric} 402781ad6265SDimitry Andric 40280b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40290b57cec5SDimitry Andric// SSE2 - Packed Mask Creation 40300b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40310b57cec5SDimitry Andric 40320b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 40330b57cec5SDimitry Andric 40340b57cec5SDimitry Andricdef VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 40350b57cec5SDimitry Andric (ins VR128:$src), 40360b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40370b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 40380b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG; 40390b57cec5SDimitry Andric 40400b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 40410b57cec5SDimitry Andricdef VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 40420b57cec5SDimitry Andric (ins VR256:$src), 40430b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40440b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 40450b57cec5SDimitry Andric Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG; 40460b57cec5SDimitry Andric} 40470b57cec5SDimitry Andric 40480b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 40490b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40500b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 40510b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>; 40520b57cec5SDimitry Andric 40530b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40540b57cec5SDimitry Andric 40550b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40560b57cec5SDimitry Andric// SSE2 - Conditional Store 40570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40580b57cec5SDimitry Andric 40590b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 406081ad6265SDimitry Andric// As VEX does not have separate instruction contexts for address size 406181ad6265SDimitry Andric// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict. 406281ad6265SDimitry Andric// Prefer VMASKMODDQU64. 406381ad6265SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in 40640b57cec5SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 40650b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 40660b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 40670b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 40680b57cec5SDimitry Andric VEX, VEX_WIG; 40690b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 40700b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 40710b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 40720b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 40730b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 407481ad6265SDimitry Andric VEX, VEX_WIG; 40750b57cec5SDimitry Andric 407681ad6265SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2] in 40770b57cec5SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 40780b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 40790b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 40800b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 40810b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 40820b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 408381ad6265SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; 40840b57cec5SDimitry Andric 40850b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40860b57cec5SDimitry Andric 40870b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40880b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword 40890b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40900b57cec5SDimitry Andric 40910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40920b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int 40930b57cec5SDimitry Andric// 40940b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 40950b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 40960b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 40970b57cec5SDimitry Andric [(set VR128:$dst, 40980b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 40990b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41000b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 41010b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41020b57cec5SDimitry Andric [(set VR128:$dst, 41030b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 41040b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 41050b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 41060b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41070b57cec5SDimitry Andric [(set VR128:$dst, 41080b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 41090b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41100b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 41110b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 41120b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41130b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 41140b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 41150b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 41160b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41170b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 41180b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41190b57cec5SDimitry Andric 41200b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 41210b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41220b57cec5SDimitry Andric [(set VR128:$dst, 41230b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 41240b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41250b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 41260b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41270b57cec5SDimitry Andric [(set VR128:$dst, 41280b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 41290b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 41300b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 41310b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41320b57cec5SDimitry Andric [(set VR128:$dst, 41330b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 41340b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41350b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 41360b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 41370b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41380b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 41390b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 41400b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 41410b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41420b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 41430b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41440b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 41450b57cec5SDimitry Andric 41460b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41470b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar 41480b57cec5SDimitry Andric// 41490b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 41500b57cec5SDimitry Andric def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 41510b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41520b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 41530b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41540b57cec5SDimitry Andric 41550b57cec5SDimitry Andric def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 41560b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41570b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 41580b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41590b57cec5SDimitry Andric 41600b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 41610b57cec5SDimitry Andric 41620b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41630b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int 41640b57cec5SDimitry Andric// 41650b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 41660b57cec5SDimitry Andricdef VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 41670b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41680b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 41690b57cec5SDimitry Andric (iPTR 0)))]>, VEX, 41700b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 41710b57cec5SDimitry Andricdef VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 41720b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src), 41730b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41740b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 41750b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 41760b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 41770b57cec5SDimitry Andricdef MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 41780b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41790b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 41800b57cec5SDimitry Andric (iPTR 0)))]>, 41810b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 41820b57cec5SDimitry Andricdef MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 41830b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41840b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 41850b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 41860b57cec5SDimitry Andric Sched<[WriteVecStore]>; 41870b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 41880b57cec5SDimitry Andric 41890b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41900b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int 41910b57cec5SDimitry Andric// 41920b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 41930b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in { 41940b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 41950b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41960b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 41970b57cec5SDimitry Andric (iPTR 0)))]>, 41980b57cec5SDimitry Andric VEX; 41990b57cec5SDimitry Andric 42000b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 42010b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42020b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 42030b57cec5SDimitry Andric (iPTR 0)))]>; 42040b57cec5SDimitry Andric} //SchedRW 42050b57cec5SDimitry Andric 42060b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 42070b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 42080b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src), 42090b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 42100b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 42110b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 42120b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 42130b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 42140b57cec5SDimitry Andric Sched<[WriteVecStore]>; 42150b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 42160b57cec5SDimitry Andric 42170b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42180b57cec5SDimitry Andric// Bitcast FR64 <-> GR64 42190b57cec5SDimitry Andric// 42200b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 42210b57cec5SDimitry Andric def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 42220b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42230b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 42240b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 42250b57cec5SDimitry Andric 42260b57cec5SDimitry Andric def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 42270b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42280b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 42290b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 42300b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 42310b57cec5SDimitry Andric 42320b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42330b57cec5SDimitry Andric// Move Scalar Single to Double Int 42340b57cec5SDimitry Andric// 42350b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 42360b57cec5SDimitry Andric def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 42370b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42380b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 42390b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 42400b57cec5SDimitry Andric def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 42410b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42420b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 42430b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 42440b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 42450b57cec5SDimitry Andric 42460b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4247*bdd1243dSDimitry Andric def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4248*bdd1243dSDimitry Andric (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4249*bdd1243dSDimitry Andric GR8:$src, sub_8bit)))>; 42500b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 42510b57cec5SDimitry Andric (VMOVDI2PDIrr GR32:$src)>; 42520b57cec5SDimitry Andric 42530b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 42540b57cec5SDimitry Andric (VMOV64toPQIrr GR64:$src)>; 42550b57cec5SDimitry Andric 42560b57cec5SDimitry Andric // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 42570b57cec5SDimitry Andric // These instructions also write zeros in the high part of a 256-bit register. 42580b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 42590b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 42600b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzload32 addr:$src)), 42610b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 42620b57cec5SDimitry Andric} 42630b57cec5SDimitry Andric 42640b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 42650b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 42660b57cec5SDimitry Andric (MOVDI2PDIrr GR32:$src)>; 42670b57cec5SDimitry Andric 42680b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 42690b57cec5SDimitry Andric (MOV64toPQIrr GR64:$src)>; 42700b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 42710b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 42720b57cec5SDimitry Andric} 42730b57cec5SDimitry Andric 42740b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 42750b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 42760b57cec5SDimitry Andric// these aliases. 42770b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 42780b57cec5SDimitry Andric (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 42790b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 42800b57cec5SDimitry Andric (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 42810b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 42820b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 42830b57cec5SDimitry Andric (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 42840b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 42850b57cec5SDimitry Andric (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 42860b57cec5SDimitry Andric 42870b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42880b57cec5SDimitry Andric// SSE2 - Move Quadword 42890b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42900b57cec5SDimitry Andric 42910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42920b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int 42930b57cec5SDimitry Andric// 42940b57cec5SDimitry Andric 42950b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 42960b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 42970b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 42980b57cec5SDimitry Andric [(set VR128:$dst, 42990b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 43000b57cec5SDimitry Andric VEX, Requires<[UseAVX]>, VEX_WIG; 43010b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 43020b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43030b57cec5SDimitry Andric [(set VR128:$dst, 43040b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 43050b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 43060b57cec5SDimitry Andric} // ExeDomain, SchedRW 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43090b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int 43100b57cec5SDimitry Andric// 43110b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 43120b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 43130b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43140b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 43150b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 43160b57cec5SDimitry Andric VEX, VEX_WIG; 43170b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 43180b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43190b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 43200b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 43210b57cec5SDimitry Andric} // ExeDomain, SchedRW 43220b57cec5SDimitry Andric 43230b57cec5SDimitry Andric// For disassembler only 43240b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 43250b57cec5SDimitry Andric SchedRW = [SchedWriteVecLogic.XMM] in { 43260b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 43270b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; 43280b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 43290b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>; 43300b57cec5SDimitry Andric} 43310b57cec5SDimitry Andric 43320b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 43330b57cec5SDimitry Andric (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 43340b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 43350b57cec5SDimitry Andric (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 43360b57cec5SDimitry Andric 43370b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 43380b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), 43390b57cec5SDimitry Andric (VMOVQI2PQIrm addr:$src)>; 43400b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzload64 addr:$src)), 43410b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 43420b57cec5SDimitry Andric 43430b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 43440b57cec5SDimitry Andric (VMOVPQI2QImr addr:$dst, VR128:$src)>; 43450b57cec5SDimitry Andric} 43460b57cec5SDimitry Andric 43470b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 43480b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 43490b57cec5SDimitry Andric 43500b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 43510b57cec5SDimitry Andric (MOVPQI2QImr addr:$dst, VR128:$src)>; 43520b57cec5SDimitry Andric} 43530b57cec5SDimitry Andric 43540b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43550b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 43560b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits. 43570b57cec5SDimitry Andric// 43580b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 43590b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 43600b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 43610b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 43620b57cec5SDimitry Andric XS, VEX, Requires<[UseAVX]>, VEX_WIG; 43630b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 43640b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43650b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 43660b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 43670b57cec5SDimitry Andric} // ExeDomain, SchedRW 43680b57cec5SDimitry Andric 43690b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 43700b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 43710b57cec5SDimitry Andric (VMOVZPQILo2PQIrr VR128:$src)>; 43720b57cec5SDimitry Andric} 43730b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 43740b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 43750b57cec5SDimitry Andric (MOVZPQILo2PQIrr VR128:$src)>; 43760b57cec5SDimitry Andric} 43770b57cec5SDimitry Andric 43780b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 43790b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 43800b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 43810b57cec5SDimitry Andric (v2f64 (VMOVZPQILo2PQIrr 43820b57cec5SDimitry Andric (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 43830b57cec5SDimitry Andric sub_xmm)>; 43840b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 43850b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 43860b57cec5SDimitry Andric (v2i64 (VMOVZPQILo2PQIrr 43870b57cec5SDimitry Andric (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 43880b57cec5SDimitry Andric sub_xmm)>; 43890b57cec5SDimitry Andric} 43900b57cec5SDimitry Andric 43910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43920b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 43930b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43940b57cec5SDimitry Andric 43950b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 43960b57cec5SDimitry Andric ValueType vt, RegisterClass RC, PatFrag mem_frag, 43970b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched> { 43980b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 43990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44000b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src)))]>, 44010b57cec5SDimitry Andric Sched<[sched]>; 44020b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 44030b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44040b57cec5SDimitry Andric [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 44050b57cec5SDimitry Andric Sched<[sched.Folded]>; 44060b57cec5SDimitry Andric} 44070b57cec5SDimitry Andric 44080b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44090b57cec5SDimitry Andric defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 44100b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 44110b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 44120b57cec5SDimitry Andric defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 44130b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 44140b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 44150b57cec5SDimitry Andric defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 44160b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 44170b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 44180b57cec5SDimitry Andric defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 44190b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 44200b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 44210b57cec5SDimitry Andric} 44220b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 44230b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 44240b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 44250b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 44260b57cec5SDimitry Andric 44270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44280b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 44290b57cec5SDimitry Andric (VMOVSHDUPrr VR128:$src)>; 44300b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 44310b57cec5SDimitry Andric (VMOVSHDUPrm addr:$src)>; 44320b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 44330b57cec5SDimitry Andric (VMOVSLDUPrr VR128:$src)>; 44340b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 44350b57cec5SDimitry Andric (VMOVSLDUPrm addr:$src)>; 44360b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup VR256:$src)), 44370b57cec5SDimitry Andric (VMOVSHDUPYrr VR256:$src)>; 44380b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 44390b57cec5SDimitry Andric (VMOVSHDUPYrm addr:$src)>; 44400b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup VR256:$src)), 44410b57cec5SDimitry Andric (VMOVSLDUPYrr VR256:$src)>; 44420b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 44430b57cec5SDimitry Andric (VMOVSLDUPYrm addr:$src)>; 44440b57cec5SDimitry Andric} 44450b57cec5SDimitry Andric 44460b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 44470b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 44480b57cec5SDimitry Andric (MOVSHDUPrr VR128:$src)>; 44490b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 44500b57cec5SDimitry Andric (MOVSHDUPrm addr:$src)>; 44510b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 44520b57cec5SDimitry Andric (MOVSLDUPrr VR128:$src)>; 44530b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 44540b57cec5SDimitry Andric (MOVSLDUPrm addr:$src)>; 44550b57cec5SDimitry Andric} 44560b57cec5SDimitry Andric 44570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44580b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP 44590b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44600b57cec5SDimitry Andric 44610b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 44620b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 44630b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44640b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 44650b57cec5SDimitry Andric Sched<[sched.XMM]>; 44660b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 44670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44680b57cec5SDimitry Andric [(set VR128:$dst, 44690b57cec5SDimitry Andric (v2f64 (X86Movddup 44700b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src)))))]>, 44710b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 44720b57cec5SDimitry Andric} 44730b57cec5SDimitry Andric 44740b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version 44750b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 44760b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 44770b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44780b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 44790b57cec5SDimitry Andric Sched<[sched.YMM]>; 44800b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 44810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44820b57cec5SDimitry Andric [(set VR256:$dst, 44830b57cec5SDimitry Andric (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 44840b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 44850b57cec5SDimitry Andric} 44860b57cec5SDimitry Andric 44870b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44880b57cec5SDimitry Andric defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 44890b57cec5SDimitry Andric VEX, VEX_WIG; 44900b57cec5SDimitry Andric defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 44910b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 44920b57cec5SDimitry Andric} 44930b57cec5SDimitry Andric 44940b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 44950b57cec5SDimitry Andric 44960b57cec5SDimitry Andric 44970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44980b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 44990b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 45000b57cec5SDimitry Andric} 45010b57cec5SDimitry Andric 45020b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 45030b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 45040b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 45050b57cec5SDimitry Andric} 45060b57cec5SDimitry Andric 45070b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45080b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer 45090b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45100b57cec5SDimitry Andric 45110b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 45120b57cec5SDimitry Andric def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 45130b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 45140b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 45150b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 45160b57cec5SDimitry Andric def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 45170b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 45180b57cec5SDimitry Andric [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 45190b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; 45200b57cec5SDimitry Andric} // Predicates 45210b57cec5SDimitry Andric 45220b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 45230b57cec5SDimitry Andric "lddqu\t{$src, $dst|$dst, $src}", 45240b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 45250b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>; 45260b57cec5SDimitry Andric 45270b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45280b57cec5SDimitry Andric// SSE3 - Arithmetic 45290b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45300b57cec5SDimitry Andric 45310b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 45320b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 45330b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 4534480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 45350b57cec5SDimitry Andric def rr : I<0xD0, MRMSrcReg, 45360b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 45370b57cec5SDimitry Andric !if(Is2Addr, 45380b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45390b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45400b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 45410b57cec5SDimitry Andric Sched<[sched]>; 45420b57cec5SDimitry Andric def rm : I<0xD0, MRMSrcMem, 45430b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 45440b57cec5SDimitry Andric !if(Is2Addr, 45450b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45460b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45470b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 45480b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 45490b57cec5SDimitry Andric} 4550480093f4SDimitry Andric} 45510b57cec5SDimitry Andric 45520b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 45530b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 45540b57cec5SDimitry Andric defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 45550b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 45560b57cec5SDimitry Andric XD, VEX_4V, VEX_WIG; 45570b57cec5SDimitry Andric defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 45580b57cec5SDimitry Andric SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 45590b57cec5SDimitry Andric XD, VEX_4V, VEX_L, VEX_WIG; 45600b57cec5SDimitry Andric } 45610b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 45620b57cec5SDimitry Andric defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 45630b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 45640b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 45650b57cec5SDimitry Andric defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 45660b57cec5SDimitry Andric SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 45670b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 45680b57cec5SDimitry Andric } 45690b57cec5SDimitry Andric} 45700b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 45710b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 45720b57cec5SDimitry Andric defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 45730b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; 45740b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 45750b57cec5SDimitry Andric defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 45760b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; 45770b57cec5SDimitry Andric} 45780b57cec5SDimitry Andric 45790b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45800b57cec5SDimitry Andric// SSE3 Instructions 45810b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45820b57cec5SDimitry Andric 45830b57cec5SDimitry Andric// Horizontal ops 45840b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 45850b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 45860b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 45870b57cec5SDimitry Andric bit Is2Addr = 1> { 4588480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 45890b57cec5SDimitry Andric def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 45900b57cec5SDimitry Andric !if(Is2Addr, 45910b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45920b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45930b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 45940b57cec5SDimitry Andric Sched<[sched]>; 45950b57cec5SDimitry Andric 45960b57cec5SDimitry Andric def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 45970b57cec5SDimitry Andric !if(Is2Addr, 45980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45990b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46000b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 46010b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46020b57cec5SDimitry Andric} 4603480093f4SDimitry Andric} 46040b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 46050b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 46060b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 46070b57cec5SDimitry Andric bit Is2Addr = 1> { 4608480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 46090b57cec5SDimitry Andric def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 46100b57cec5SDimitry Andric !if(Is2Addr, 46110b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46120b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46130b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 46140b57cec5SDimitry Andric Sched<[sched]>; 46150b57cec5SDimitry Andric 46160b57cec5SDimitry Andric def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 46170b57cec5SDimitry Andric !if(Is2Addr, 46180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46200b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 46210b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46220b57cec5SDimitry Andric} 4623480093f4SDimitry Andric} 46240b57cec5SDimitry Andric 46250b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 46260b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 46270b57cec5SDimitry Andric defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 46280b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 46290b57cec5SDimitry Andric defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 46300b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 46310b57cec5SDimitry Andric defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 46320b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 46330b57cec5SDimitry Andric defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 46340b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 46350b57cec5SDimitry Andric } 46360b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 46370b57cec5SDimitry Andric defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 46380b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 46390b57cec5SDimitry Andric defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 46400b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 46410b57cec5SDimitry Andric defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 46420b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 46430b57cec5SDimitry Andric defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 46440b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 46450b57cec5SDimitry Andric } 46460b57cec5SDimitry Andric} 46470b57cec5SDimitry Andric 46480b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 46490b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 46500b57cec5SDimitry Andric defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 46510b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 46520b57cec5SDimitry Andric defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 46530b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 46540b57cec5SDimitry Andric } 46550b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 46560b57cec5SDimitry Andric defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 46570b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 46580b57cec5SDimitry Andric defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 46590b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric} 46620b57cec5SDimitry Andric 46630b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46640b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions 46650b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46660b57cec5SDimitry Andric 46670b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 46680b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 46690b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 46700b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 46710b57cec5SDimitry Andric (ins VR128:$src), 46720b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 46730b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 46740b57cec5SDimitry Andric Sched<[sched.XMM]>; 46750b57cec5SDimitry Andric 46760b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 46770b57cec5SDimitry Andric (ins i128mem:$src), 46780b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 46790b57cec5SDimitry Andric [(set VR128:$dst, 46800b57cec5SDimitry Andric (vt (OpNode (ld_frag addr:$src))))]>, 46810b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 46820b57cec5SDimitry Andric} 46830b57cec5SDimitry Andric 46840b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 46850b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 46860b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 46870b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 46880b57cec5SDimitry Andric (ins VR256:$src), 46890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 46900b57cec5SDimitry Andric [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 46910b57cec5SDimitry Andric Sched<[sched.YMM]>; 46920b57cec5SDimitry Andric 46930b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 46940b57cec5SDimitry Andric (ins i256mem:$src), 46950b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 46960b57cec5SDimitry Andric [(set VR256:$dst, 46970b57cec5SDimitry Andric (vt (OpNode (load addr:$src))))]>, 46980b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 46990b57cec5SDimitry Andric} 47000b57cec5SDimitry Andric 47010b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 47020b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 47030b57cec5SDimitry Andric load>, VEX, VEX_WIG; 47040b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 47050b57cec5SDimitry Andric load>, VEX, VEX_WIG; 47060b57cec5SDimitry Andric} 47070b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 47080b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 47090b57cec5SDimitry Andric load>, VEX, VEX_WIG; 47100b57cec5SDimitry Andric} 47110b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 47120b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 47130b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 47140b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 47150b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 47160b57cec5SDimitry Andric} 47170b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 47180b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 47190b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 47200b57cec5SDimitry Andric} 47210b57cec5SDimitry Andric 47220b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 47230b57cec5SDimitry Andric memop>; 47240b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 47250b57cec5SDimitry Andric memop>; 47260b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 47270b57cec5SDimitry Andric memop>; 47280b57cec5SDimitry Andric 47290b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 47300b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions 47310b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 47320b57cec5SDimitry Andric 47330b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op 47340b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 47350b57cec5SDimitry Andric ValueType DstVT, ValueType OpVT, RegisterClass RC, 47360b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 47370b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 47380b57cec5SDimitry Andric let isCommutable = 1 in 47390b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 47400b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 47410b57cec5SDimitry Andric !if(Is2Addr, 47420b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47430b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47440b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 47450b57cec5SDimitry Andric Sched<[sched]>; 47460b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 47470b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 47480b57cec5SDimitry Andric !if(Is2Addr, 47490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47500b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47510b57cec5SDimitry Andric [(set RC:$dst, 47520b57cec5SDimitry Andric (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 47530b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 47540b57cec5SDimitry Andric} 47550b57cec5SDimitry Andric 47560b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 47570b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 47580b57cec5SDimitry Andric Intrinsic IntId128, X86FoldableSchedWrite sched, 47590b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 47600b57cec5SDimitry Andric let isCommutable = 1 in 47610b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 47620b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 47630b57cec5SDimitry Andric !if(Is2Addr, 47640b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47650b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47660b57cec5SDimitry Andric [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 47670b57cec5SDimitry Andric Sched<[sched]>; 47680b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 47690b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 47700b57cec5SDimitry Andric !if(Is2Addr, 47710b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47720b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47730b57cec5SDimitry Andric [(set VR128:$dst, 47740b57cec5SDimitry Andric (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 47750b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 47760b57cec5SDimitry Andric} 47770b57cec5SDimitry Andric 47780b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 47790b57cec5SDimitry Andric Intrinsic IntId256, 47800b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 47810b57cec5SDimitry Andric let isCommutable = 1 in 47820b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 47830b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 47840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 47850b57cec5SDimitry Andric [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 47860b57cec5SDimitry Andric Sched<[sched]>; 47870b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 47880b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 47890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 47900b57cec5SDimitry Andric [(set VR256:$dst, 47910b57cec5SDimitry Andric (IntId256 VR256:$src1, (load addr:$src2)))]>, 47920b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 47930b57cec5SDimitry Andric} 47940b57cec5SDimitry Andric 47950b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 47960b57cec5SDimitry Andriclet isCommutable = 0 in { 47970b57cec5SDimitry Andric defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 47980b57cec5SDimitry Andric VR128, load, i128mem, 47990b57cec5SDimitry Andric SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; 48000b57cec5SDimitry Andric defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 48010b57cec5SDimitry Andric v16i8, VR128, load, i128mem, 48020b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 48030b57cec5SDimitry Andric} 48040b57cec5SDimitry Andricdefm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 48050b57cec5SDimitry Andric VR128, load, i128mem, 48060b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 48070b57cec5SDimitry Andric} 48080b57cec5SDimitry Andric 48090b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in { 48100b57cec5SDimitry Andriclet isCommutable = 0 in { 48110b57cec5SDimitry Andric defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 48120b57cec5SDimitry Andric load, i128mem, 48130b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 48140b57cec5SDimitry Andric defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 48150b57cec5SDimitry Andric load, i128mem, 48160b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 48170b57cec5SDimitry Andric defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 48180b57cec5SDimitry Andric load, i128mem, 48190b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 48200b57cec5SDimitry Andric defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 48210b57cec5SDimitry Andric load, i128mem, 4822d409305fSDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 48230b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 48240b57cec5SDimitry Andric int_x86_ssse3_psign_b_128, 48250b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 48260b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 48270b57cec5SDimitry Andric int_x86_ssse3_psign_w_128, 48280b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 48290b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 48300b57cec5SDimitry Andric int_x86_ssse3_psign_d_128, 48310b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 48320b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 48330b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 48340b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 48350b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 48360b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 48370b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 48380b57cec5SDimitry Andric} 48390b57cec5SDimitry Andric} 48400b57cec5SDimitry Andric 48410b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 48420b57cec5SDimitry Andriclet isCommutable = 0 in { 48430b57cec5SDimitry Andric defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 48440b57cec5SDimitry Andric VR256, load, i256mem, 48450b57cec5SDimitry Andric SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48460b57cec5SDimitry Andric defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 48470b57cec5SDimitry Andric v32i8, VR256, load, i256mem, 48480b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48490b57cec5SDimitry Andric} 48500b57cec5SDimitry Andricdefm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 48510b57cec5SDimitry Andric VR256, load, i256mem, 48520b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48530b57cec5SDimitry Andric} 48540b57cec5SDimitry Andric 48550b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in { 48560b57cec5SDimitry Andriclet isCommutable = 0 in { 48570b57cec5SDimitry Andric defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 48580b57cec5SDimitry Andric VR256, load, i256mem, 48590b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48600b57cec5SDimitry Andric defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 48610b57cec5SDimitry Andric load, i256mem, 48620b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48630b57cec5SDimitry Andric defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 48640b57cec5SDimitry Andric VR256, load, i256mem, 48650b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48660b57cec5SDimitry Andric defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 48670b57cec5SDimitry Andric load, i256mem, 4868d409305fSDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 48690b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 48700b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 48710b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 48720b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 48730b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 48740b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 48750b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 48760b57cec5SDimitry Andric int_x86_avx2_phadd_sw, 48770b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 48780b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 48790b57cec5SDimitry Andric int_x86_avx2_phsub_sw, 48800b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 48810b57cec5SDimitry Andric} 48820b57cec5SDimitry Andric} 48830b57cec5SDimitry Andric 48840b57cec5SDimitry Andric// None of these have i8 immediate fields. 48850b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in { 48860b57cec5SDimitry Andriclet isCommutable = 0 in { 48870b57cec5SDimitry Andric defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 48880b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 48890b57cec5SDimitry Andric defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 48900b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 48910b57cec5SDimitry Andric defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 48920b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 48930b57cec5SDimitry Andric defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 48940b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 48950b57cec5SDimitry Andric defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 48960b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 48970b57cec5SDimitry Andric defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 48980b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 48990b57cec5SDimitry Andric defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 49000b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 49010b57cec5SDimitry Andric defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 49020b57cec5SDimitry Andric memop, i128mem, SchedWriteVarShuffle.XMM>; 49030b57cec5SDimitry Andric defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 49040b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 49050b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 49060b57cec5SDimitry Andric defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 49070b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 49080b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 49090b57cec5SDimitry Andric defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 49100b57cec5SDimitry Andric v16i8, VR128, memop, i128mem, 49110b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 49120b57cec5SDimitry Andric} 49130b57cec5SDimitry Andricdefm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 49140b57cec5SDimitry Andric VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 49150b57cec5SDimitry Andric} 49160b57cec5SDimitry Andric 49170b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49180b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns 49190b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49200b57cec5SDimitry Andric 49210b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 49220b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 49230b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 49240b57cec5SDimitry Andric let hasSideEffects = 0 in { 49250b57cec5SDimitry Andric def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 49260b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 49270b57cec5SDimitry Andric !if(Is2Addr, 49280b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 49290b57cec5SDimitry Andric !strconcat(asm, 49300b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 49318bcb0991SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, 49320b57cec5SDimitry Andric Sched<[sched]>; 49330b57cec5SDimitry Andric let mayLoad = 1 in 49340b57cec5SDimitry Andric def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 49350b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 49360b57cec5SDimitry Andric !if(Is2Addr, 49370b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 49380b57cec5SDimitry Andric !strconcat(asm, 49390b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 49400b57cec5SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, 49410b57cec5SDimitry Andric (memop_frag addr:$src2), 49428bcb0991SDimitry Andric (i8 timm:$src3))))]>, 49430b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 49440b57cec5SDimitry Andric } 49450b57cec5SDimitry Andric} 49460b57cec5SDimitry Andric 49470b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 49480b57cec5SDimitry Andric defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 49490b57cec5SDimitry Andric SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; 49500b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 49510b57cec5SDimitry Andric defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 49520b57cec5SDimitry Andric SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 49530b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 49540b57cec5SDimitry Andric defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 49550b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 49560b57cec5SDimitry Andric 49570b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49580b57cec5SDimitry Andric// SSSE3 - Thread synchronization 49590b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49600b57cec5SDimitry Andric 49610b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 49620b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in 49630b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 49640b57cec5SDimitry Andric TB, Requires<[HasSSE3, Not64BitMode]>; 49650b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in 49660b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 49670b57cec5SDimitry Andric TB, Requires<[HasSSE3, In64BitMode]>; 49680b57cec5SDimitry Andric 49690b57cec5SDimitry Andriclet Uses = [ECX, EAX] in 49700b57cec5SDimitry Andricdef MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 49710b57cec5SDimitry Andric [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 49720b57cec5SDimitry Andric} // SchedRW 49730b57cec5SDimitry Andric 49740b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 49750b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 49760b57cec5SDimitry Andric 49770b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 49780b57cec5SDimitry Andric Requires<[Not64BitMode]>; 49790b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 49800b57cec5SDimitry Andric Requires<[In64BitMode]>; 49810b57cec5SDimitry Andric 49820b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 49830b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend 49840b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 49850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 49860b57cec5SDimitry Andric 49870b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 49880b57cec5SDimitry Andric RegisterClass OutRC, RegisterClass InRC, 49890b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 49900b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 49910b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 49920b57cec5SDimitry Andric Sched<[sched]>; 49930b57cec5SDimitry Andric 49940b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 49950b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 49960b57cec5SDimitry Andric Sched<[sched.Folded]>; 49970b57cec5SDimitry Andric} 49980b57cec5SDimitry Andric 49990b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 50000b57cec5SDimitry Andric X86MemOperand MemOp, X86MemOperand MemYOp, 50010b57cec5SDimitry Andric Predicate prd> { 50020b57cec5SDimitry Andric defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 50030b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 50040b57cec5SDimitry Andric let Predicates = [HasAVX, prd] in 50050b57cec5SDimitry Andric defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 5006*bdd1243dSDimitry Andric VR128, VR128, SchedWriteVecExtend.XMM>, 50070b57cec5SDimitry Andric VEX, VEX_WIG; 50080b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in 50090b57cec5SDimitry Andric defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 5010*bdd1243dSDimitry Andric VR256, VR128, SchedWriteVecExtend.YMM>, 50110b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 50120b57cec5SDimitry Andric} 50130b57cec5SDimitry Andric 50140b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 50150b57cec5SDimitry Andric X86MemOperand MemYOp, Predicate prd> { 50160b57cec5SDimitry Andric defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 50170b57cec5SDimitry Andric MemOp, MemYOp, prd>; 50180b57cec5SDimitry Andric defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 50190b57cec5SDimitry Andric !strconcat("pmovzx", OpcodeStr), 50200b57cec5SDimitry Andric MemOp, MemYOp, prd>; 50210b57cec5SDimitry Andric} 50220b57cec5SDimitry Andric 50230b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 50240b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 50250b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 50260b57cec5SDimitry Andric 50270b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 50280b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 50290b57cec5SDimitry Andric 50300b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 50310b57cec5SDimitry Andric 50320b57cec5SDimitry Andric// AVX2 Patterns 50330b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 50340b57cec5SDimitry Andric SDNode ExtOp, SDNode InVecOp> { 50350b57cec5SDimitry Andric // Register-Register patterns 50360b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 50370b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 50380b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 50390b57cec5SDimitry Andric } 50400b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 50410b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 50420b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 50430b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 50440b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 50450b57cec5SDimitry Andric 50460b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 50470b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 50480b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 50490b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 50500b57cec5SDimitry Andric 50510b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 50520b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 50530b57cec5SDimitry Andric } 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric // Simple Register-Memory patterns 50560b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 50570b57cec5SDimitry Andric def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50580b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 50590b57cec5SDimitry Andric 50600b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 50610b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 50620b57cec5SDimitry Andric } 50630b57cec5SDimitry Andric 50640b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 50650b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50660b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 50670b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50680b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 50690b57cec5SDimitry Andric 50700b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50710b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 50720b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50730b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 50740b57cec5SDimitry Andric 50750b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 50760b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 50770b57cec5SDimitry Andric } 50780b57cec5SDimitry Andric 50790b57cec5SDimitry Andric // AVX2 Register-Memory patterns 50800b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 50810b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 50820b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 50830b57cec5SDimitry Andric 50840b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 50850b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 50865ffd83dbSDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 50875ffd83dbSDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 50885ffd83dbSDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 50890b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 50900b57cec5SDimitry Andric 50910b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 50920b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 50930b57cec5SDimitry Andric 50940b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 50950b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 50965ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))), 50970b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 50980b57cec5SDimitry Andric 50990b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51000b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51015ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51025ffd83dbSDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51035ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 51040b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51050b57cec5SDimitry Andric } 51060b57cec5SDimitry Andric} 51070b57cec5SDimitry Andric 51080b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 51090b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 51100b57cec5SDimitry Andric 51110b57cec5SDimitry Andric// SSE4.1/AVX patterns. 51120b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 51130b57cec5SDimitry Andric SDNode ExtOp> { 51140b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51150b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 51160b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 51170b57cec5SDimitry Andric } 51180b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51190b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 51200b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 51210b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 51220b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 51230b57cec5SDimitry Andric 51240b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 51250b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 51260b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 51270b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 51280b57cec5SDimitry Andric 51290b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 51300b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 51310b57cec5SDimitry Andric } 51320b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51330b57cec5SDimitry Andric def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51340b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51350b57cec5SDimitry Andric } 51360b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51370b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51380b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51390b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51400b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51410b57cec5SDimitry Andric 51420b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 51430b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51440b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 51450b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 51460b57cec5SDimitry Andric 51470b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 51480b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51490b57cec5SDimitry Andric } 51500b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51510b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51520b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51530b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51540b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51550b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 51560b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51570b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 51580b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51590b57cec5SDimitry Andric } 51600b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51610b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 51620b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51630b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 51640b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51650b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 51660b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51670b57cec5SDimitry Andric 51680b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 51690b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51700b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 51710b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51720b57cec5SDimitry Andric 51730b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51740b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51750b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51760b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51770b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 51780b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51790b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 51800b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51810b57cec5SDimitry Andric 51820b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 51830b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 51840b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 51850b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 51860b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 51870b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 51880b57cec5SDimitry Andric 51890b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51900b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51910b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51920b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51930b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 51940b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51950b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 51960b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51970b57cec5SDimitry Andric } 51980b57cec5SDimitry Andric} 51990b57cec5SDimitry Andric 52000b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 52010b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 52020b57cec5SDimitry Andric 52030b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 52040b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 52050b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 52060b57cec5SDimitry Andric} 52070b57cec5SDimitry Andric 52080b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52090b57cec5SDimitry Andric// SSE4.1 - Extract Instructions 52100b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52110b57cec5SDimitry Andric 52120b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 52130b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> { 52140b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 52150b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52160b57cec5SDimitry Andric !strconcat(OpcodeStr, 52170b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52180b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 5219e8d8bef9SDimitry Andric timm:$src2))]>, 52200b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52210b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 52220b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52230b57cec5SDimitry Andric (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 52240b57cec5SDimitry Andric !strconcat(OpcodeStr, 52250b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5226e8d8bef9SDimitry Andric [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), 52270b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52280b57cec5SDimitry Andric} 52290b57cec5SDimitry Andric 52300b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 52310b57cec5SDimitry Andric defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG; 52320b57cec5SDimitry Andric 52330b57cec5SDimitry Andricdefm PEXTRB : SS41I_extract8<0x14, "pextrb">; 52340b57cec5SDimitry Andric 52350b57cec5SDimitry Andric 52360b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 52370b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> { 52380b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 52390b57cec5SDimitry Andric def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 52400b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52410b57cec5SDimitry Andric !strconcat(OpcodeStr, 52420b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 52430b57cec5SDimitry Andric Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>; 52440b57cec5SDimitry Andric 52450b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 52460b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52470b57cec5SDimitry Andric (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 52480b57cec5SDimitry Andric !strconcat(OpcodeStr, 52490b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5250e8d8bef9SDimitry Andric [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), 52510b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52520b57cec5SDimitry Andric} 52530b57cec5SDimitry Andric 52540b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 52550b57cec5SDimitry Andric defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG; 52560b57cec5SDimitry Andric 52570b57cec5SDimitry Andricdefm PEXTRW : SS41I_extract16<0x15, "pextrw">; 52580b57cec5SDimitry Andric 525981ad6265SDimitry Andriclet Predicates = [UseSSE41] in 526081ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (PEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 526181ad6265SDimitry Andric 526281ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 526381ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (VPEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 526481ad6265SDimitry Andric 52650b57cec5SDimitry Andric 52660b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 52670b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> { 52680b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 52690b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52700b57cec5SDimitry Andric !strconcat(OpcodeStr, 52710b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52720b57cec5SDimitry Andric [(set GR32:$dst, 52730b57cec5SDimitry Andric (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 52740b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52750b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52760b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 52770b57cec5SDimitry Andric !strconcat(OpcodeStr, 52780b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52790b57cec5SDimitry Andric [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 52800b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52810b57cec5SDimitry Andric} 52820b57cec5SDimitry Andric 52830b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 52840b57cec5SDimitry Andric defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 52850b57cec5SDimitry Andric 52860b57cec5SDimitry Andricdefm PEXTRD : SS41I_extract32<0x16, "pextrd">; 52870b57cec5SDimitry Andric 52880b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 52890b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> { 52900b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 52910b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52920b57cec5SDimitry Andric !strconcat(OpcodeStr, 52930b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52940b57cec5SDimitry Andric [(set GR64:$dst, 52950b57cec5SDimitry Andric (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 52960b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52970b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52980b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 52990b57cec5SDimitry Andric !strconcat(OpcodeStr, 53000b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53010b57cec5SDimitry Andric [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 53020b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 53030b57cec5SDimitry Andric} 53040b57cec5SDimitry Andric 53050b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 53060b57cec5SDimitry Andric defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 53070b57cec5SDimitry Andric 53080b57cec5SDimitry Andricdefm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 53090b57cec5SDimitry Andric 53100b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 53110b57cec5SDimitry Andric/// destination 53120b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 53130b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 53140b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 53150b57cec5SDimitry Andric !strconcat(OpcodeStr, 53160b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53170b57cec5SDimitry Andric [(set GR32orGR64:$dst, 53180b57cec5SDimitry Andric (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 53190b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 53200b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 53210b57cec5SDimitry Andric (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 53220b57cec5SDimitry Andric !strconcat(OpcodeStr, 53230b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53240b57cec5SDimitry Andric [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 53250b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 53260b57cec5SDimitry Andric} 53270b57cec5SDimitry Andric 53280b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 53290b57cec5SDimitry Andric let Predicates = [UseAVX] in 53300b57cec5SDimitry Andric defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; 53310b57cec5SDimitry Andric defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 53320b57cec5SDimitry Andric} 53330b57cec5SDimitry Andric 53340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53350b57cec5SDimitry Andric// SSE4.1 - Insert Instructions 53360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53370b57cec5SDimitry Andric 53380b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 53390b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53400b57cec5SDimitry Andric (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 53410b57cec5SDimitry Andric !if(Is2Addr, 53420b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53430b57cec5SDimitry Andric !strconcat(asm, 53440b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53450b57cec5SDimitry Andric [(set VR128:$dst, 5346e8d8bef9SDimitry Andric (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 53470b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 53480b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 53490b57cec5SDimitry Andric (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 53500b57cec5SDimitry Andric !if(Is2Addr, 53510b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53520b57cec5SDimitry Andric !strconcat(asm, 53530b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53540b57cec5SDimitry Andric [(set VR128:$dst, 5355e8d8bef9SDimitry Andric (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, 53560b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 53570b57cec5SDimitry Andric} 53580b57cec5SDimitry Andric 5359*bdd1243dSDimitry Andriclet Predicates = [HasAVX, NoBWI] in { 53600b57cec5SDimitry Andric defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; 5361*bdd1243dSDimitry Andric def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 5362*bdd1243dSDimitry Andric (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 5363*bdd1243dSDimitry Andric GR8:$src2, sub_8bit), timm:$src3)>; 5364*bdd1243dSDimitry Andric} 5365*bdd1243dSDimitry Andric 53660b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 53670b57cec5SDimitry Andric defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 53680b57cec5SDimitry Andric 53690b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 53700b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53710b57cec5SDimitry Andric (ins VR128:$src1, GR32:$src2, u8imm:$src3), 53720b57cec5SDimitry Andric !if(Is2Addr, 53730b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53740b57cec5SDimitry Andric !strconcat(asm, 53750b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53760b57cec5SDimitry Andric [(set VR128:$dst, 53770b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 53780b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 53790b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 53800b57cec5SDimitry Andric (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 53810b57cec5SDimitry Andric !if(Is2Addr, 53820b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53830b57cec5SDimitry Andric !strconcat(asm, 53840b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53850b57cec5SDimitry Andric [(set VR128:$dst, 53860b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 53870b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 53880b57cec5SDimitry Andric} 53890b57cec5SDimitry Andric 53900b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 53910b57cec5SDimitry Andric defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 53920b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 53930b57cec5SDimitry Andric defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 53940b57cec5SDimitry Andric 53950b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 53960b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53970b57cec5SDimitry Andric (ins VR128:$src1, GR64:$src2, u8imm:$src3), 53980b57cec5SDimitry Andric !if(Is2Addr, 53990b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54000b57cec5SDimitry Andric !strconcat(asm, 54010b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54020b57cec5SDimitry Andric [(set VR128:$dst, 54030b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 54040b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 54050b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 54060b57cec5SDimitry Andric (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 54070b57cec5SDimitry Andric !if(Is2Addr, 54080b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54090b57cec5SDimitry Andric !strconcat(asm, 54100b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54110b57cec5SDimitry Andric [(set VR128:$dst, 54120b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 54130b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 54140b57cec5SDimitry Andric} 54150b57cec5SDimitry Andric 54160b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 54170b57cec5SDimitry Andric defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 54180b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 54190b57cec5SDimitry Andric defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 54200b57cec5SDimitry Andric 54210b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which 54220b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination 54230b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements 54240b57cec5SDimitry Andric// in the target vector. 54250b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 54260b57cec5SDimitry Andric let isCommutable = 1 in 54270b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 54280b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 54290b57cec5SDimitry Andric !if(Is2Addr, 54300b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54310b57cec5SDimitry Andric !strconcat(asm, 54320b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54330b57cec5SDimitry Andric [(set VR128:$dst, 54348bcb0991SDimitry Andric (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, 54350b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 54360b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 54370b57cec5SDimitry Andric (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 54380b57cec5SDimitry Andric !if(Is2Addr, 54390b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54400b57cec5SDimitry Andric !strconcat(asm, 54410b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54420b57cec5SDimitry Andric [(set VR128:$dst, 54430b57cec5SDimitry Andric (X86insertps VR128:$src1, 54440b57cec5SDimitry Andric (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 54458bcb0991SDimitry Andric timm:$src3))]>, 54460b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 54470b57cec5SDimitry Andric} 54480b57cec5SDimitry Andric 54490b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 54500b57cec5SDimitry Andric let Predicates = [UseAVX] in 54510b57cec5SDimitry Andric defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 54520b57cec5SDimitry Andric VEX_4V, VEX_WIG; 54530b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 54540b57cec5SDimitry Andric defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 54550b57cec5SDimitry Andric} 54560b57cec5SDimitry Andric 54570b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 54580b57cec5SDimitry Andric// SSE4.1 - Round Instructions 54590b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 54600b57cec5SDimitry Andric 54610b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 54620b57cec5SDimitry Andric X86MemOperand x86memop, RegisterClass RC, 5463fe6060f1SDimitry Andric ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode, 54640b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 54650b57cec5SDimitry Andric // Intrinsic operation, reg. 54660b57cec5SDimitry Andric // Vector intrinsic operation, reg 5467480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 54680b57cec5SDimitry Andric def r : SS4AIi8<opc, MRMSrcReg, 54690b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 54700b57cec5SDimitry Andric !strconcat(OpcodeStr, 54710b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54728bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, 54730b57cec5SDimitry Andric Sched<[sched]>; 54740b57cec5SDimitry Andric 54750b57cec5SDimitry Andric // Vector intrinsic operation, mem 54760b57cec5SDimitry Andric def m : SS4AIi8<opc, MRMSrcMem, 54770b57cec5SDimitry Andric (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 54780b57cec5SDimitry Andric !strconcat(OpcodeStr, 54790b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54800b57cec5SDimitry Andric [(set RC:$dst, 54818bcb0991SDimitry Andric (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, 54820b57cec5SDimitry Andric Sched<[sched.Folded]>; 54830b57cec5SDimitry Andric} 5484480093f4SDimitry Andric} 54850b57cec5SDimitry Andric 54860b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 54870b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 54888bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 54890b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 54900b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 54910b57cec5SDimitry Andric !strconcat(OpcodeStr, 54920b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 54930b57cec5SDimitry Andric []>, Sched<[sched]>; 54940b57cec5SDimitry Andric 54950b57cec5SDimitry Andric let mayLoad = 1 in 54960b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 54970b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 54980b57cec5SDimitry Andric !strconcat(OpcodeStr, 54990b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55000b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55010b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 55020b57cec5SDimitry Andric 55038bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 55040b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 55050b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 55060b57cec5SDimitry Andric !strconcat(OpcodeStr, 55070b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55080b57cec5SDimitry Andric []>, Sched<[sched]>; 55090b57cec5SDimitry Andric 55100b57cec5SDimitry Andric let mayLoad = 1 in 55110b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 55120b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 55130b57cec5SDimitry Andric !strconcat(OpcodeStr, 55140b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55150b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55160b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 55170b57cec5SDimitry Andric} 55180b57cec5SDimitry Andric 55190b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 55200b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5521480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 55228bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 55230b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 55240b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 55250b57cec5SDimitry Andric !strconcat(OpcodeStr, 55260b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55270b57cec5SDimitry Andric []>, Sched<[sched]>; 55280b57cec5SDimitry Andric 55290b57cec5SDimitry Andric let mayLoad = 1 in 55300b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 55310b57cec5SDimitry Andric (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 55320b57cec5SDimitry Andric !strconcat(OpcodeStr, 55330b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55340b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55350b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 55360b57cec5SDimitry Andric 55378bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 55380b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 55390b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 55400b57cec5SDimitry Andric !strconcat(OpcodeStr, 55410b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55420b57cec5SDimitry Andric []>, Sched<[sched]>; 55430b57cec5SDimitry Andric 55440b57cec5SDimitry Andric let mayLoad = 1 in 55450b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 55460b57cec5SDimitry Andric (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 55470b57cec5SDimitry Andric !strconcat(OpcodeStr, 55480b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55490b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55500b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 55510b57cec5SDimitry Andric} 5552480093f4SDimitry Andric} 55530b57cec5SDimitry Andric 55540b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd, 55550b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched, 55560b57cec5SDimitry Andric ValueType VT32, ValueType VT64, 55570b57cec5SDimitry Andric SDNode OpNode, bit Is2Addr = 1> { 5558480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 55598bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle in { 55600b57cec5SDimitry Andric def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 55610b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 55620b57cec5SDimitry Andric !if(Is2Addr, 55630b57cec5SDimitry Andric !strconcat(OpcodeStr, 55640b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55650b57cec5SDimitry Andric !strconcat(OpcodeStr, 55660b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 55678bcb0991SDimitry Andric [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 55680b57cec5SDimitry Andric Sched<[sched]>; 55690b57cec5SDimitry Andric 55700b57cec5SDimitry Andric def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 55710b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 55720b57cec5SDimitry Andric !if(Is2Addr, 55730b57cec5SDimitry Andric !strconcat(OpcodeStr, 55740b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55750b57cec5SDimitry Andric !strconcat(OpcodeStr, 55760b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 55770b57cec5SDimitry Andric [(set VR128:$dst, 55785ffd83dbSDimitry Andric (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>, 55790b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 55800b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 55810b57cec5SDimitry Andric 55828bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble in { 55830b57cec5SDimitry Andric def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 55840b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 55850b57cec5SDimitry Andric !if(Is2Addr, 55860b57cec5SDimitry Andric !strconcat(OpcodeStr, 55870b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55880b57cec5SDimitry Andric !strconcat(OpcodeStr, 55890b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 55908bcb0991SDimitry Andric [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 55910b57cec5SDimitry Andric Sched<[sched]>; 55920b57cec5SDimitry Andric 55930b57cec5SDimitry Andric def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, 55940b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 55950b57cec5SDimitry Andric !if(Is2Addr, 55960b57cec5SDimitry Andric !strconcat(OpcodeStr, 55970b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55980b57cec5SDimitry Andric !strconcat(OpcodeStr, 55990b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 56000b57cec5SDimitry Andric [(set VR128:$dst, 56015ffd83dbSDimitry Andric (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>, 56020b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 56030b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 56040b57cec5SDimitry Andric} 5605480093f4SDimitry Andric} 56060b57cec5SDimitry Andric 56070b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd 56080b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5609480093f4SDimitry Andric let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { 56100b57cec5SDimitry Andric // Intrinsic form 56110b57cec5SDimitry Andric defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 5612480093f4SDimitry Andric loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>, 56130b57cec5SDimitry Andric VEX, VEX_WIG; 56140b57cec5SDimitry Andric defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 5615480093f4SDimitry Andric loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>, 56160b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 56170b57cec5SDimitry Andric } 56180b57cec5SDimitry Andric 5619480093f4SDimitry Andric let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { 56200b57cec5SDimitry Andric defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 5621480093f4SDimitry Andric loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>, 56220b57cec5SDimitry Andric VEX, VEX_WIG; 56230b57cec5SDimitry Andric defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 5624480093f4SDimitry Andric loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>, 56250b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 56260b57cec5SDimitry Andric } 56270b57cec5SDimitry Andric} 56280b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 56290b57cec5SDimitry Andric defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 56300b57cec5SDimitry Andric v4f32, v2f64, X86RndScales, 0>, 5631480093f4SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 56320b57cec5SDimitry Andric defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 5633480093f4SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 56340b57cec5SDimitry Andric} 56350b57cec5SDimitry Andric 56360b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5637480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 56388bcb0991SDimitry Andric (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; 5639480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 56408bcb0991SDimitry Andric (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; 56410b57cec5SDimitry Andric} 56420b57cec5SDimitry Andric 56430b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 5644480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 56458bcb0991SDimitry Andric (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5646480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 56478bcb0991SDimitry Andric (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 56480b57cec5SDimitry Andric} 56490b57cec5SDimitry Andric 56500b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 56510b57cec5SDimitry Andricdefm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 5652480093f4SDimitry Andric memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>; 56530b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 56540b57cec5SDimitry Andricdefm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 5655480093f4SDimitry Andric memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>; 56560b57cec5SDimitry Andric 56570b57cec5SDimitry Andricdefm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 56580b57cec5SDimitry Andric 56590b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 56600b57cec5SDimitry Andricdefm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 56610b57cec5SDimitry Andric v4f32, v2f64, X86RndScales>; 56620b57cec5SDimitry Andric 56630b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 5664480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 56658bcb0991SDimitry Andric (ROUNDSSr FR32:$src1, timm:$src2)>; 5666480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 56678bcb0991SDimitry Andric (ROUNDSDr FR64:$src1, timm:$src2)>; 56680b57cec5SDimitry Andric} 56690b57cec5SDimitry Andric 56700b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in { 5671480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 56728bcb0991SDimitry Andric (ROUNDSSm addr:$src1, timm:$src2)>; 5673480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 56748bcb0991SDimitry Andric (ROUNDSDm addr:$src1, timm:$src2)>; 56750b57cec5SDimitry Andric} 56760b57cec5SDimitry Andric 56770b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 56780b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test 56790b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 56800b57cec5SDimitry Andric 56810b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from 56820b57cec5SDimitry Andric// the intel intrinsic that corresponds to this. 56830b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 56840b57cec5SDimitry Andricdef VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 56850b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 56860b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 56870b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG; 56880b57cec5SDimitry Andricdef VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 56890b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 56900b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 56910b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 56920b57cec5SDimitry Andric VEX, VEX_WIG; 56930b57cec5SDimitry Andric 56940b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 56950b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 56960b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 56970b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG; 56980b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 56990b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 57000b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 57010b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 57020b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 57030b57cec5SDimitry Andric} 57040b57cec5SDimitry Andric 57050b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 57060b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 57070b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 57080b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 57090b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>; 57100b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 57110b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 57120b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 57130b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 57140b57cec5SDimitry Andric} 57150b57cec5SDimitry Andric 57160b57cec5SDimitry Andric// The bit test instructions below are AVX only 57170b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 57180b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 57190b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 57200b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 57210b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 57220b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 57230b57cec5SDimitry Andric Sched<[sched]>, VEX; 57240b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 57250b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 57260b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 57270b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 57280b57cec5SDimitry Andric} 57290b57cec5SDimitry Andric 57300b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 57310b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 57320b57cec5SDimitry Andricdefm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 57330b57cec5SDimitry Andric SchedWriteFTest.XMM>; 57340b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 57350b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 57360b57cec5SDimitry Andric} 57370b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 57380b57cec5SDimitry Andricdefm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 57390b57cec5SDimitry Andric SchedWriteFTest.XMM>; 57400b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 57410b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 57420b57cec5SDimitry Andric} 57430b57cec5SDimitry Andric} 57440b57cec5SDimitry Andric 57450b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 57460b57cec5SDimitry Andric// SSE4.1 - Misc Instructions 57470b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 57480b57cec5SDimitry Andric 57490b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 57500b57cec5SDimitry Andric def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 57510b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 57520b57cec5SDimitry Andric [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 57530b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize16, XS; 57540b57cec5SDimitry Andric def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 57550b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 57560b57cec5SDimitry Andric [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 57570b57cec5SDimitry Andric (implicit EFLAGS)]>, 57580b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize16, XS; 57590b57cec5SDimitry Andric 57600b57cec5SDimitry Andric def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 57610b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 57620b57cec5SDimitry Andric [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 57630b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize32, XS; 57640b57cec5SDimitry Andric 57650b57cec5SDimitry Andric def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 57660b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 57670b57cec5SDimitry Andric [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 57680b57cec5SDimitry Andric (implicit EFLAGS)]>, 57690b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize32, XS; 57700b57cec5SDimitry Andric 57710b57cec5SDimitry Andric def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 57720b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 57730b57cec5SDimitry Andric [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 57740b57cec5SDimitry Andric Sched<[WritePOPCNT]>, XS; 57750b57cec5SDimitry Andric def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 57760b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 57770b57cec5SDimitry Andric [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 57780b57cec5SDimitry Andric (implicit EFLAGS)]>, 57790b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, XS; 57800b57cec5SDimitry Andric} 57810b57cec5SDimitry Andric 57820b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 57830b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 57840b57cec5SDimitry Andric SDNode OpNode, PatFrag ld_frag, 57850b57cec5SDimitry Andric X86FoldableSchedWrite Sched> { 57860b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 57870b57cec5SDimitry Andric (ins VR128:$src), 57880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 57890b57cec5SDimitry Andric [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 57900b57cec5SDimitry Andric Sched<[Sched]>; 57910b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 57920b57cec5SDimitry Andric (ins i128mem:$src), 57930b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 57940b57cec5SDimitry Andric [(set VR128:$dst, 57950b57cec5SDimitry Andric (v8i16 (OpNode (ld_frag addr:$src))))]>, 57960b57cec5SDimitry Andric Sched<[Sched.Folded]>; 57970b57cec5SDimitry Andric} 57980b57cec5SDimitry Andric 57990b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling 58000b57cec5SDimitry Andric// model, although the naming is misleading. 58010b57cec5SDimitry Andriclet Predicates = [HasAVX] in 58020b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 58030b57cec5SDimitry Andric X86phminpos, load, 58040b57cec5SDimitry Andric WritePHMINPOS>, VEX, VEX_WIG; 58050b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 58060b57cec5SDimitry Andric X86phminpos, memop, 58070b57cec5SDimitry Andric WritePHMINPOS>; 58080b57cec5SDimitry Andric 58090b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator. 58100b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 58110b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 58120b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 58130b57cec5SDimitry Andric bit Is2Addr = 1> { 58140b57cec5SDimitry Andric let isCommutable = 1 in 58150b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 58160b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 58170b57cec5SDimitry Andric !if(Is2Addr, 58180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 58190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 58200b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 58210b57cec5SDimitry Andric Sched<[sched]>; 58220b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 58230b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 58240b57cec5SDimitry Andric !if(Is2Addr, 58250b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 58260b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 58270b57cec5SDimitry Andric [(set RC:$dst, 58280b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 58290b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 58300b57cec5SDimitry Andric} 58310b57cec5SDimitry Andric 58320b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 58330b57cec5SDimitry Andric defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 58340b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58350b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58360b57cec5SDimitry Andric defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 58370b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58380b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58390b57cec5SDimitry Andric defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 58400b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58410b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58420b57cec5SDimitry Andric defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 58430b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58440b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58450b57cec5SDimitry Andric defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 58460b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 58470b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58480b57cec5SDimitry Andric} 58490b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 58500b57cec5SDimitry Andric defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 58510b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58520b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58530b57cec5SDimitry Andric defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 58540b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58550b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58560b57cec5SDimitry Andric defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 58570b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58580b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58590b57cec5SDimitry Andric defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 58600b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 58610b57cec5SDimitry Andric VEX_4V, VEX_WIG; 58620b57cec5SDimitry Andric} 58630b57cec5SDimitry Andric 58640b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 58650b57cec5SDimitry Andric defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 58660b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58670b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58680b57cec5SDimitry Andric defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 58690b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58700b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58710b57cec5SDimitry Andric defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 58720b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58730b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58740b57cec5SDimitry Andric defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 58750b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58760b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58770b57cec5SDimitry Andric defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 58780b57cec5SDimitry Andric load, i256mem, SchedWriteVecIMul.YMM, 0>, 58790b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58800b57cec5SDimitry Andric} 58810b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 58820b57cec5SDimitry Andric defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 58830b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58840b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58850b57cec5SDimitry Andric defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 58860b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58870b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58880b57cec5SDimitry Andric defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 58890b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58900b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58910b57cec5SDimitry Andric defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 58920b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 58930b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 58940b57cec5SDimitry Andric} 58950b57cec5SDimitry Andric 58960b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 58970b57cec5SDimitry Andric defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 58980b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 58990b57cec5SDimitry Andric defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 59000b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59010b57cec5SDimitry Andric defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 59020b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59030b57cec5SDimitry Andric defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 59040b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59050b57cec5SDimitry Andric defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 59060b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59070b57cec5SDimitry Andric defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 59080b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59090b57cec5SDimitry Andric defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 59100b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59110b57cec5SDimitry Andric defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 59120b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59130b57cec5SDimitry Andric defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 59140b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM, 1>; 59150b57cec5SDimitry Andric} 59160b57cec5SDimitry Andric 59170b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 59180b57cec5SDimitry Andric defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 59190b57cec5SDimitry Andric load, i128mem, SchedWritePMULLD.XMM, 0>, 59200b57cec5SDimitry Andric VEX_4V, VEX_WIG; 59210b57cec5SDimitry Andriclet Predicates = [HasAVX] in 59220b57cec5SDimitry Andric defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 59230b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 59240b57cec5SDimitry Andric VEX_4V, VEX_WIG; 59250b57cec5SDimitry Andric 59260b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 59270b57cec5SDimitry Andric defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 59280b57cec5SDimitry Andric load, i256mem, SchedWritePMULLD.YMM, 0>, 59290b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 59300b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 59310b57cec5SDimitry Andric defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 59320b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 59330b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 59340b57cec5SDimitry Andric 59350b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 59360b57cec5SDimitry Andric defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 59370b57cec5SDimitry Andric memop, i128mem, SchedWritePMULLD.XMM, 1>; 59380b57cec5SDimitry Andric defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 59390b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59400b57cec5SDimitry Andric} 59410b57cec5SDimitry Andric 59420b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 59430b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 59440b57cec5SDimitry Andric Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 59450b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 59460b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 59470b57cec5SDimitry Andric let isCommutable = 1 in 59480b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 59490b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 59500b57cec5SDimitry Andric !if(Is2Addr, 59510b57cec5SDimitry Andric !strconcat(OpcodeStr, 59520b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 59530b57cec5SDimitry Andric !strconcat(OpcodeStr, 59540b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 59558bcb0991SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, 59560b57cec5SDimitry Andric Sched<[sched]>; 59570b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 59580b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 59590b57cec5SDimitry Andric !if(Is2Addr, 59600b57cec5SDimitry Andric !strconcat(OpcodeStr, 59610b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 59620b57cec5SDimitry Andric !strconcat(OpcodeStr, 59630b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 59640b57cec5SDimitry Andric [(set RC:$dst, 59658bcb0991SDimitry Andric (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, 59660b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 59670b57cec5SDimitry Andric} 59680b57cec5SDimitry Andric 59690b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 59700b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 59710b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 59720b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 59730b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 59740b57cec5SDimitry Andric let isCommutable = 1 in 59750b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 59760b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 59770b57cec5SDimitry Andric !if(Is2Addr, 59780b57cec5SDimitry Andric !strconcat(OpcodeStr, 59790b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 59800b57cec5SDimitry Andric !strconcat(OpcodeStr, 59810b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 59828bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 59830b57cec5SDimitry Andric Sched<[sched]>; 59840b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 59850b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 59860b57cec5SDimitry Andric !if(Is2Addr, 59870b57cec5SDimitry Andric !strconcat(OpcodeStr, 59880b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 59890b57cec5SDimitry Andric !strconcat(OpcodeStr, 59900b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 59910b57cec5SDimitry Andric [(set RC:$dst, 59928bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 59930b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 59940b57cec5SDimitry Andric} 59950b57cec5SDimitry Andric 59968bcb0991SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<timm, [{ 59970b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x03; 59980b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x03, SDLoc(N)); 59990b57cec5SDimitry Andric}]>; 60000b57cec5SDimitry Andric 60018bcb0991SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<timm, [{ 60020b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x0f; 60030b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 60040b57cec5SDimitry Andric}]>; 60050b57cec5SDimitry Andric 60068bcb0991SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<timm, [{ 60070b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0xff; 60080b57cec5SDimitry Andric return getI8Imm(Imm ^ 0xff, SDLoc(N)); 60090b57cec5SDimitry Andric}]>; 60100b57cec5SDimitry Andric 60110b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 60128bcb0991SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<timm, [{ 60130b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60140b57cec5SDimitry Andric uint8_t NewImm = 0; 60150b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 60160b57cec5SDimitry Andric if (Imm & (1 << i)) 60170b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60180b57cec5SDimitry Andric } 60190b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60200b57cec5SDimitry Andric}]>; 60210b57cec5SDimitry Andric 60220b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 60238bcb0991SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<timm, [{ 60240b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60250b57cec5SDimitry Andric uint8_t NewImm = 0; 60260b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60270b57cec5SDimitry Andric if (Imm & (1 << i)) 60280b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 60290b57cec5SDimitry Andric } 60300b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60310b57cec5SDimitry Andric}]>; 60320b57cec5SDimitry Andric 60330b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 60348bcb0991SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<timm, [{ 60350b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60360b57cec5SDimitry Andric uint8_t NewImm = 0; 60370b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60380b57cec5SDimitry Andric if (Imm & (1 << i)) 60390b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60400b57cec5SDimitry Andric } 60410b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60420b57cec5SDimitry Andric}]>; 60430b57cec5SDimitry Andric 60440b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 60458bcb0991SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<timm, [{ 60460b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60470b57cec5SDimitry Andric uint8_t NewImm = 0; 60480b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 60490b57cec5SDimitry Andric if (Imm & (1 << i)) 60500b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60510b57cec5SDimitry Andric } 60520b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 60530b57cec5SDimitry Andric}]>; 60540b57cec5SDimitry Andric 60550b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 60568bcb0991SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<timm, [{ 60570b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60580b57cec5SDimitry Andric uint8_t NewImm = 0; 60590b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60600b57cec5SDimitry Andric if (Imm & (1 << i)) 60610b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 60620b57cec5SDimitry Andric } 60630b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 60640b57cec5SDimitry Andric}]>; 60650b57cec5SDimitry Andric 60660b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 60678bcb0991SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ 60680b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60690b57cec5SDimitry Andric uint8_t NewImm = 0; 60700b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60710b57cec5SDimitry Andric if (Imm & (1 << i)) 60720b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60730b57cec5SDimitry Andric } 60740b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 60750b57cec5SDimitry Andric}]>; 60760b57cec5SDimitry Andric 60770b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 60780b57cec5SDimitry Andric let isCommutable = 0 in { 60790b57cec5SDimitry Andric defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 60800b57cec5SDimitry Andric VR128, load, i128mem, 0, 60810b57cec5SDimitry Andric SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; 60820b57cec5SDimitry Andric } 60830b57cec5SDimitry Andric 6084480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 60850b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 60860b57cec5SDimitry Andric defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 60870b57cec5SDimitry Andric VR128, load, f128mem, 0, 60880b57cec5SDimitry Andric SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; 60890b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 60900b57cec5SDimitry Andric defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 60910b57cec5SDimitry Andric VR128, load, f128mem, 0, 60920b57cec5SDimitry Andric SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; 60930b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 60940b57cec5SDimitry Andric defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 60950b57cec5SDimitry Andric VR256, load, i256mem, 0, 60960b57cec5SDimitry Andric SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; 60970b57cec5SDimitry Andric} 6098480093f4SDimitry Andric} 60990b57cec5SDimitry Andric 61000b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 61010b57cec5SDimitry Andric let isCommutable = 0 in { 61020b57cec5SDimitry Andric defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 61030b57cec5SDimitry Andric VR256, load, i256mem, 0, 61040b57cec5SDimitry Andric SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; 61050b57cec5SDimitry Andric } 61060b57cec5SDimitry Andric} 61070b57cec5SDimitry Andric 61080b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 61090b57cec5SDimitry Andric let isCommutable = 0 in { 61100b57cec5SDimitry Andric defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 61110b57cec5SDimitry Andric VR128, memop, i128mem, 1, 61120b57cec5SDimitry Andric SchedWriteMPSAD.XMM>; 61130b57cec5SDimitry Andric } 61140b57cec5SDimitry Andric 61150b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 61160b57cec5SDimitry Andric defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 61170b57cec5SDimitry Andric VR128, memop, f128mem, 1, 6118480093f4SDimitry Andric SchedWriteDPPS.XMM>, SIMD_EXC; 61190b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 61200b57cec5SDimitry Andric defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 61210b57cec5SDimitry Andric VR128, memop, f128mem, 1, 6122480093f4SDimitry Andric SchedWriteDPPD.XMM>, SIMD_EXC; 61230b57cec5SDimitry Andric} 61240b57cec5SDimitry Andric 61250b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 61260b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 61270b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 61280b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, Domain d, 61290b57cec5SDimitry Andric X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 61300b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 61310b57cec5SDimitry Andric let isCommutable = 1 in 61320b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 61330b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 61340b57cec5SDimitry Andric !if(Is2Addr, 61350b57cec5SDimitry Andric !strconcat(OpcodeStr, 61360b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 61370b57cec5SDimitry Andric !strconcat(OpcodeStr, 61380b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 61398bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 61400b57cec5SDimitry Andric Sched<[sched]>; 61410b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 61420b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 61430b57cec5SDimitry Andric !if(Is2Addr, 61440b57cec5SDimitry Andric !strconcat(OpcodeStr, 61450b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 61460b57cec5SDimitry Andric !strconcat(OpcodeStr, 61470b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 61480b57cec5SDimitry Andric [(set RC:$dst, 61498bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 61500b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 61510b57cec5SDimitry Andric} 61520b57cec5SDimitry Andric 61530b57cec5SDimitry Andric // Pattern to commute if load is in first source. 61548bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), 61550b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 61568bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 61570b57cec5SDimitry Andric} 61580b57cec5SDimitry Andric 61590b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 61600b57cec5SDimitry Andric defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 61610b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedSingle, 61620b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>, 61630b57cec5SDimitry Andric VEX_4V, VEX_WIG; 61640b57cec5SDimitry Andric defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 61650b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedSingle, 61660b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm8>, 61670b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 61680b57cec5SDimitry Andric defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 61690b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedDouble, 61700b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>, 61710b57cec5SDimitry Andric VEX_4V, VEX_WIG; 61720b57cec5SDimitry Andric defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 61730b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedDouble, 61740b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm4>, 61750b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 61760b57cec5SDimitry Andric defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 61770b57cec5SDimitry Andric VR128, load, i128mem, 0, SSEPackedInt, 61780b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>, 61790b57cec5SDimitry Andric VEX_4V, VEX_WIG; 61800b57cec5SDimitry Andric} 61810b57cec5SDimitry Andric 61820b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 61830b57cec5SDimitry Andric defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 61840b57cec5SDimitry Andric VR256, load, i256mem, 0, SSEPackedInt, 61850b57cec5SDimitry Andric SchedWriteBlend.YMM, BlendCommuteImm8>, 61860b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 61870b57cec5SDimitry Andric} 61880b57cec5SDimitry Andric 61890b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 61900b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on. 61910b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 61928bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 61938bcb0991SDimitry Andric (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; 61948bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 61958bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; 61968bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 61978bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; 61980b57cec5SDimitry Andric 61990b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62000b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize. 62018bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 62028bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 62038bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 62048bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 62058bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 62068bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 62070b57cec5SDimitry Andric 62088bcb0991SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), 62098bcb0991SDimitry Andric (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; 62108bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), 62118bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; 62128bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), 62138bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; 62140b57cec5SDimitry Andric 62150b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62160b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 62178bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 62188bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 62198bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), 62208bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 62218bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), 62228bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 62230b57cec5SDimitry Andric} 62240b57cec5SDimitry Andric 62250b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 62260b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedSingle, 62270b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>; 62280b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 62290b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedDouble, 62300b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>; 62310b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 62320b57cec5SDimitry Andric VR128, memop, i128mem, 1, SSEPackedInt, 62330b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>; 62340b57cec5SDimitry Andric 62350b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 62360b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62370b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 62388bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 62398bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 62408bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), 62418bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 62428bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), 62438bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 62440b57cec5SDimitry Andric 62458bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 62468bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 62478bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), 62488bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 62498bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), 62508bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 62510b57cec5SDimitry Andric} 62520b57cec5SDimitry Andric 62530b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 62540b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 62550b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 62560b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 62570b57cec5SDimitry Andric (VBLENDPDYrri VR256:$src1, 62580b57cec5SDimitry Andric (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 62590b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0x3)>; 62600b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 62610b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 62620b57cec5SDimitry Andric (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 62630b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 62640b57cec5SDimitry Andric 62650b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 62660b57cec5SDimitry Andric (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 62670b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 62680b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 62690b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 62700b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 62710b57cec5SDimitry Andric} 62720b57cec5SDimitry Andric 62730b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 62740b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 62750b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 62760b57cec5SDimitry Andric PatFrag mem_frag, SDNode OpNode, 62770b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 62780b57cec5SDimitry Andric def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 62790b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 62800b57cec5SDimitry Andric !strconcat(OpcodeStr, 62810b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 62820b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 62830b57cec5SDimitry Andric SSEPackedInt>, TAPD, VEX_4V, 62840b57cec5SDimitry Andric Sched<[sched]>; 62850b57cec5SDimitry Andric 62860b57cec5SDimitry Andric def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 62870b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, RC:$src3), 62880b57cec5SDimitry Andric !strconcat(OpcodeStr, 62890b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 62900b57cec5SDimitry Andric [(set RC:$dst, 62910b57cec5SDimitry Andric (OpNode RC:$src3, (mem_frag addr:$src2), 62920b57cec5SDimitry Andric RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, 62930b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold, 62940b57cec5SDimitry Andric // x86memop:$src2 62950b57cec5SDimitry Andric ReadDefault, ReadDefault, ReadDefault, ReadDefault, 62960b57cec5SDimitry Andric ReadDefault, 62970b57cec5SDimitry Andric // RC::$src3 62980b57cec5SDimitry Andric sched.ReadAfterFold]>; 62990b57cec5SDimitry Andric} 63000b57cec5SDimitry Andric 63010b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 63020b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 63030b57cec5SDimitry Andricdefm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 63040b57cec5SDimitry Andric v2f64, loadv2f64, X86Blendv, 63050b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 63060b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 63070b57cec5SDimitry Andric v4f64, loadv4f64, X86Blendv, 63080b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 63090b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble 63100b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 63110b57cec5SDimitry Andricdefm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 63120b57cec5SDimitry Andric v4f32, loadv4f32, X86Blendv, 63130b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 63140b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 63150b57cec5SDimitry Andric v8f32, loadv8f32, X86Blendv, 63160b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 63170b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle 63180b57cec5SDimitry Andricdefm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 63190b57cec5SDimitry Andric v16i8, loadv16i8, X86Blendv, 63200b57cec5SDimitry Andric SchedWriteVarBlend.XMM>; 63210b57cec5SDimitry Andric} 63220b57cec5SDimitry Andric 63230b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 63240b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 63250b57cec5SDimitry Andric v32i8, loadv32i8, X86Blendv, 63260b57cec5SDimitry Andric SchedWriteVarBlend.YMM>, VEX_L; 63270b57cec5SDimitry Andric} 63280b57cec5SDimitry Andric 63290b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 63300b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 63310b57cec5SDimitry Andric (v4i32 VR128:$src2))), 63320b57cec5SDimitry Andric (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 63330b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 63340b57cec5SDimitry Andric (v2i64 VR128:$src2))), 63350b57cec5SDimitry Andric (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 63360b57cec5SDimitry Andric def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 63370b57cec5SDimitry Andric (v8i32 VR256:$src2))), 63380b57cec5SDimitry Andric (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 63390b57cec5SDimitry Andric def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 63400b57cec5SDimitry Andric (v4i64 VR256:$src2))), 63410b57cec5SDimitry Andric (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 63420b57cec5SDimitry Andric} 63430b57cec5SDimitry Andric 63440b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 63450b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 63460b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 63470b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in { 63480b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 63490b57cec5SDimitry Andric (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 63500b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 63510b57cec5SDimitry Andric (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 63520b57cec5SDimitry Andric 63530b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 63540b57cec5SDimitry Andric (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 63550b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 63560b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 63570b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 63580b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 63590b57cec5SDimitry Andric 63600b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 63610b57cec5SDimitry Andric (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 63620b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 63630b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 63640b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 63650b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 63660b57cec5SDimitry Andric 63670b57cec5SDimitry Andric // Move low f32 and clear high bits. 63680b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 63690b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 63700b57cec5SDimitry Andric (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 63710b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 63720b57cec5SDimitry Andric (i8 1))), sub_xmm)>; 63730b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 63740b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 63750b57cec5SDimitry Andric (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 63760b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 63770b57cec5SDimitry Andric (i8 3))), sub_xmm)>; 63780b57cec5SDimitry Andric} 63790b57cec5SDimitry Andric 63800b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 63810b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 63820b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 63830b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in { 63840b57cec5SDimitry Andric // With SSE41 we can use blends for these patterns. 63850b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 63860b57cec5SDimitry Andric (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 63870b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 63880b57cec5SDimitry Andric (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 63890b57cec5SDimitry Andric 63900b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 63910b57cec5SDimitry Andric (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 63920b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 63930b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 63940b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 63950b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 63960b57cec5SDimitry Andric 63970b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 63980b57cec5SDimitry Andric (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 63990b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 64000b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 64010b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 64020b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 64030b57cec5SDimitry Andric} 64040b57cec5SDimitry Andric 64050b57cec5SDimitry Andric 64060b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator 64070b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in { 64080b57cec5SDimitry Andric multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 64090b57cec5SDimitry Andric PatFrag mem_frag, X86MemOperand x86memop, 64100b57cec5SDimitry Andric SDNode OpNode, X86FoldableSchedWrite sched> { 64110b57cec5SDimitry Andric def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 64120b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 64130b57cec5SDimitry Andric !strconcat(OpcodeStr, 64140b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 64150b57cec5SDimitry Andric [(set VR128:$dst, 64160b57cec5SDimitry Andric (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 64170b57cec5SDimitry Andric Sched<[sched]>; 64180b57cec5SDimitry Andric 64190b57cec5SDimitry Andric def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 64200b57cec5SDimitry Andric (ins VR128:$src1, x86memop:$src2), 64210b57cec5SDimitry Andric !strconcat(OpcodeStr, 64220b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 64230b57cec5SDimitry Andric [(set VR128:$dst, 64240b57cec5SDimitry Andric (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 64250b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 64260b57cec5SDimitry Andric } 64270b57cec5SDimitry Andric} 64280b57cec5SDimitry Andric 64290b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 64300b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 64310b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 64320b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 64330b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 64340b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 64350b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 64360b57cec5SDimitry Andric X86Blendv, SchedWriteVarBlend.XMM>; 64370b57cec5SDimitry Andric 64380b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument 64390b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 64400b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 64410b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 64420b57cec5SDimitry Andric (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 64430b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 64440b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 64450b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 64460b57cec5SDimitry Andric (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 64470b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 64480b57cec5SDimitry Andric (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 64490b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 64500b57cec5SDimitry Andric (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 64510b57cec5SDimitry Andric 64520b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 64530b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 64540b57cec5SDimitry Andric (v4i32 VR128:$src2))), 64550b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 64560b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 64570b57cec5SDimitry Andric (v2i64 VR128:$src2))), 64580b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 64590b57cec5SDimitry Andric} 64600b57cec5SDimitry Andric 64610b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 64620b57cec5SDimitry Andric 64630b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 64640b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 64650b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 64660b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; 64670b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 64680b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 64690b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 64700b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; 64710b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 64720b57cec5SDimitry Andric "movntdqa\t{$src, $dst|$dst, $src}", []>, 64730b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 64740b57cec5SDimitry Andric 64750b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 64760b57cec5SDimitry Andric def : Pat<(v8f32 (alignednontemporalload addr:$src)), 64770b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64780b57cec5SDimitry Andric def : Pat<(v4f64 (alignednontemporalload addr:$src)), 64790b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64800b57cec5SDimitry Andric def : Pat<(v4i64 (alignednontemporalload addr:$src)), 64810b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64820b57cec5SDimitry Andric def : Pat<(v8i32 (alignednontemporalload addr:$src)), 64830b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64840b57cec5SDimitry Andric def : Pat<(v16i16 (alignednontemporalload addr:$src)), 64850b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6486fcaf7f86SDimitry Andric def : Pat<(v16f16 (alignednontemporalload addr:$src)), 6487fcaf7f86SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64880b57cec5SDimitry Andric def : Pat<(v32i8 (alignednontemporalload addr:$src)), 64890b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 64900b57cec5SDimitry Andric} 64910b57cec5SDimitry Andric 64920b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 64930b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 64940b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 64950b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 64960b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 64970b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 64980b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 64990b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 65000b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65010b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 65020b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6503fcaf7f86SDimitry Andric def : Pat<(v8f16 (alignednontemporalload addr:$src)), 6504fcaf7f86SDimitry Andric (VMOVNTDQArm addr:$src)>; 65050b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 65060b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65070b57cec5SDimitry Andric} 65080b57cec5SDimitry Andric 65090b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 65100b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 65110b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65120b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 65130b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65140b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 65150b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65160b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 65170b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65180b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 65190b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6520fcaf7f86SDimitry Andric def : Pat<(v8f16 (alignednontemporalload addr:$src)), 6521fcaf7f86SDimitry Andric (MOVNTDQArm addr:$src)>; 65220b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 65230b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65240b57cec5SDimitry Andric} 65250b57cec5SDimitry Andric 65260b57cec5SDimitry Andric} // AddedComplexity 65270b57cec5SDimitry Andric 65280b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65290b57cec5SDimitry Andric// SSE4.2 - Compare Instructions 65300b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65310b57cec5SDimitry Andric 65320b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator 65330b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 65340b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 65350b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 65360b57cec5SDimitry Andric bit Is2Addr = 1> { 65370b57cec5SDimitry Andric def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 65380b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 65390b57cec5SDimitry Andric !if(Is2Addr, 65400b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 65410b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 65420b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 65430b57cec5SDimitry Andric Sched<[sched]>; 65440b57cec5SDimitry Andric def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 65450b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 65460b57cec5SDimitry Andric !if(Is2Addr, 65470b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 65480b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 65490b57cec5SDimitry Andric [(set RC:$dst, 65500b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 65510b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 65520b57cec5SDimitry Andric} 65530b57cec5SDimitry Andric 65540b57cec5SDimitry Andriclet Predicates = [HasAVX] in 65550b57cec5SDimitry Andric defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 65560b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 65570b57cec5SDimitry Andric VEX_4V, VEX_WIG; 65580b57cec5SDimitry Andric 65590b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 65600b57cec5SDimitry Andric defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 65610b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 65620b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 65630b57cec5SDimitry Andric 65640b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 65650b57cec5SDimitry Andric defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 65660b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM>; 65670b57cec5SDimitry Andric 65680b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65690b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions 65700b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65710b57cec5SDimitry Andric 65720b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> { 65730b57cec5SDimitry Andric def rr : SS42AI<0x62, MRMSrcReg, (outs), 65740b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 65750b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 65760b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM]>; 65770b57cec5SDimitry Andric let mayLoad = 1 in 65780b57cec5SDimitry Andric def rm :SS42AI<0x62, MRMSrcMem, (outs), 65790b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 65800b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 65810b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 65820b57cec5SDimitry Andric} 65830b57cec5SDimitry Andric 65840b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 65850b57cec5SDimitry Andric let Predicates = [HasAVX] in 6586d409305fSDimitry Andric defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; 65870b57cec5SDimitry Andric defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 65880b57cec5SDimitry Andric} 65890b57cec5SDimitry Andric 65900b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> { 65910b57cec5SDimitry Andric def rr : SS42AI<0x60, MRMSrcReg, (outs), 65920b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 65930b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 65940b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM]>; 65950b57cec5SDimitry Andric let mayLoad = 1 in 65960b57cec5SDimitry Andric def rm : SS42AI<0x60, MRMSrcMem, (outs), 65970b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 65980b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 65990b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 66000b57cec5SDimitry Andric} 66010b57cec5SDimitry Andric 66020b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 66030b57cec5SDimitry Andric let Predicates = [HasAVX] in 6604d409305fSDimitry Andric defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; 66050b57cec5SDimitry Andric defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 66060b57cec5SDimitry Andric} 66070b57cec5SDimitry Andric 66080b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> { 66090b57cec5SDimitry Andric def rr : SS42AI<0x63, MRMSrcReg, (outs), 66100b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 66110b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66120b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI]>; 66130b57cec5SDimitry Andric let mayLoad = 1 in 66140b57cec5SDimitry Andric def rm : SS42AI<0x63, MRMSrcMem, (outs), 66150b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 66160b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66170b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 66180b57cec5SDimitry Andric} 66190b57cec5SDimitry Andric 66200b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 66210b57cec5SDimitry Andric let Predicates = [HasAVX] in 6622d409305fSDimitry Andric defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; 66230b57cec5SDimitry Andric defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 66240b57cec5SDimitry Andric} 66250b57cec5SDimitry Andric 66260b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> { 66270b57cec5SDimitry Andric def rr : SS42AI<0x61, MRMSrcReg, (outs), 66280b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 66290b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66300b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI]>; 66310b57cec5SDimitry Andric let mayLoad = 1 in 66320b57cec5SDimitry Andric def rm : SS42AI<0x61, MRMSrcMem, (outs), 66330b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 66340b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66350b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 66360b57cec5SDimitry Andric} 66370b57cec5SDimitry Andric 66380b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 66390b57cec5SDimitry Andric let Predicates = [HasAVX] in 6640d409305fSDimitry Andric defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; 66410b57cec5SDimitry Andric defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 66420b57cec5SDimitry Andric} 66430b57cec5SDimitry Andric 66440b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66450b57cec5SDimitry Andric// SSE4.2 - CRC Instructions 66460b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66470b57cec5SDimitry Andric 66480b57cec5SDimitry Andric// No CRC instructions have AVX equivalents 66490b57cec5SDimitry Andric 66500b57cec5SDimitry Andric// crc intrinsic instruction 66510b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size 66520b57cec5SDimitry Andric// of r and m. 66530b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 66540b57cec5SDimitry Andric RegisterClass RCIn, SDPatternOperator Int> : 6655349cc55cSDimitry Andric CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 66560b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 66570b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, 66580b57cec5SDimitry Andric Sched<[WriteCRC32]>; 66590b57cec5SDimitry Andric 66600b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 66610b57cec5SDimitry Andric X86MemOperand x86memop, SDPatternOperator Int> : 6662349cc55cSDimitry Andric CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 66630b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 66640b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, 66650b57cec5SDimitry Andric Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; 66660b57cec5SDimitry Andric 66670b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 66680b57cec5SDimitry Andric def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 66690b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 66700b57cec5SDimitry Andric def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 66710b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 66720b57cec5SDimitry Andric def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 66730b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 66740b57cec5SDimitry Andric def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 66750b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 66760b57cec5SDimitry Andric def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 66770b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 66780b57cec5SDimitry Andric def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 66790b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 66800b57cec5SDimitry Andric def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 66810b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 66820b57cec5SDimitry Andric def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 66830b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 66840b57cec5SDimitry Andric let hasSideEffects = 0 in { 66850b57cec5SDimitry Andric let mayLoad = 1 in 66860b57cec5SDimitry Andric def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 66870b57cec5SDimitry Andric null_frag>, REX_W; 66880b57cec5SDimitry Andric def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 66890b57cec5SDimitry Andric null_frag>, REX_W; 66900b57cec5SDimitry Andric } 66910b57cec5SDimitry Andric} 66920b57cec5SDimitry Andric 66930b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66940b57cec5SDimitry Andric// SHA-NI Instructions 66950b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66960b57cec5SDimitry Andric 66970b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 66980b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 66990b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 67000b57cec5SDimitry Andric def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 67010b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 67020b57cec5SDimitry Andric !if(UsesXMM0, 67030b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 67040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 67050b57cec5SDimitry Andric [!if(UsesXMM0, 67060b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 67070b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 67085ffd83dbSDimitry Andric T8PS, Sched<[sched]>; 67090b57cec5SDimitry Andric 67100b57cec5SDimitry Andric def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 67110b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 67120b57cec5SDimitry Andric !if(UsesXMM0, 67130b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 67140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 67150b57cec5SDimitry Andric [!if(UsesXMM0, 67160b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 67170b57cec5SDimitry Andric (memop addr:$src2), XMM0)), 67180b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 67195ffd83dbSDimitry Andric (memop addr:$src2))))]>, T8PS, 67200b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 67210b57cec5SDimitry Andric} 67220b57cec5SDimitry Andric 67230b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 67240b57cec5SDimitry Andric def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 67250b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 67260b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67270b57cec5SDimitry Andric [(set VR128:$dst, 67280b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 67295ffd83dbSDimitry Andric (i8 timm:$src3)))]>, TAPS, 67300b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM]>; 67310b57cec5SDimitry Andric def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 67320b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 67330b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67340b57cec5SDimitry Andric [(set VR128:$dst, 67350b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, 67360b57cec5SDimitry Andric (memop addr:$src2), 67375ffd83dbSDimitry Andric (i8 timm:$src3)))]>, TAPS, 67380b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM.Folded, 67390b57cec5SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold]>; 67400b57cec5SDimitry Andric 67410b57cec5SDimitry Andric defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 67420b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67430b57cec5SDimitry Andric defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 67440b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67450b57cec5SDimitry Andric defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 67460b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67470b57cec5SDimitry Andric 67480b57cec5SDimitry Andric let Uses=[XMM0] in 67490b57cec5SDimitry Andric defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 67500b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 1>; 67510b57cec5SDimitry Andric 67520b57cec5SDimitry Andric defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 67530b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67540b57cec5SDimitry Andric defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 67550b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67560b57cec5SDimitry Andric} 67570b57cec5SDimitry Andric 67580b57cec5SDimitry Andric// Aliases with explicit %xmm0 67590b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 67600b57cec5SDimitry Andric (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; 67610b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 67620b57cec5SDimitry Andric (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; 67630b57cec5SDimitry Andric 67640b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67650b57cec5SDimitry Andric// AES-NI Instructions 67660b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67670b57cec5SDimitry Andric 67680b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 67690b57cec5SDimitry Andric Intrinsic IntId, PatFrag ld_frag, 67700b57cec5SDimitry Andric bit Is2Addr = 0, RegisterClass RC = VR128, 67710b57cec5SDimitry Andric X86MemOperand MemOp = i128mem> { 67725ffd83dbSDimitry Andric let AsmString = OpcodeStr# 67730b57cec5SDimitry Andric !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 67740b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 67750b57cec5SDimitry Andric def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 67760b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), "", 67770b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 67780b57cec5SDimitry Andric Sched<[WriteAESDecEnc]>; 67790b57cec5SDimitry Andric def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 67800b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2), "", 67810b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 67820b57cec5SDimitry Andric Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 67830b57cec5SDimitry Andric } 67840b57cec5SDimitry Andric} 67850b57cec5SDimitry Andric 67860b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow 67870b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 67880b57cec5SDimitry Andric defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 67890b57cec5SDimitry Andric int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG; 67900b57cec5SDimitry Andric defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 67910b57cec5SDimitry Andric int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG; 67920b57cec5SDimitry Andric defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 67930b57cec5SDimitry Andric int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG; 67940b57cec5SDimitry Andric defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 67950b57cec5SDimitry Andric int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG; 67960b57cec5SDimitry Andric} 67970b57cec5SDimitry Andric 67980b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in { 67990b57cec5SDimitry Andric defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 68000b57cec5SDimitry Andric int_x86_aesni_aesenc_256, load, 0, VR256, 68010b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 68020b57cec5SDimitry Andric defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 68030b57cec5SDimitry Andric int_x86_aesni_aesenclast_256, load, 0, VR256, 68040b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 68050b57cec5SDimitry Andric defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 68060b57cec5SDimitry Andric int_x86_aesni_aesdec_256, load, 0, VR256, 68070b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 68080b57cec5SDimitry Andric defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 68090b57cec5SDimitry Andric int_x86_aesni_aesdeclast_256, load, 0, VR256, 68100b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 68110b57cec5SDimitry Andric} 68120b57cec5SDimitry Andric 68130b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 68140b57cec5SDimitry Andric defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 68150b57cec5SDimitry Andric int_x86_aesni_aesenc, memop, 1>; 68160b57cec5SDimitry Andric defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 68170b57cec5SDimitry Andric int_x86_aesni_aesenclast, memop, 1>; 68180b57cec5SDimitry Andric defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 68190b57cec5SDimitry Andric int_x86_aesni_aesdec, memop, 1>; 68200b57cec5SDimitry Andric defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 68210b57cec5SDimitry Andric int_x86_aesni_aesdeclast, memop, 1>; 68220b57cec5SDimitry Andric} 68230b57cec5SDimitry Andric 68240b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation 68250b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 68260b57cec5SDimitry Andric def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 68270b57cec5SDimitry Andric (ins VR128:$src1), 68280b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 68290b57cec5SDimitry Andric [(set VR128:$dst, 68300b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 68310b57cec5SDimitry Andric VEX, VEX_WIG; 68320b57cec5SDimitry Andric def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 68330b57cec5SDimitry Andric (ins i128mem:$src1), 68340b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 68350b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 68360b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; 68370b57cec5SDimitry Andric} 68380b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 68390b57cec5SDimitry Andric (ins VR128:$src1), 68400b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 68410b57cec5SDimitry Andric [(set VR128:$dst, 68420b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 68430b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 68440b57cec5SDimitry Andric (ins i128mem:$src1), 68450b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 68460b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 68470b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>; 68480b57cec5SDimitry Andric 68490b57cec5SDimitry Andric// AES Round Key Generation Assist 68500b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 68510b57cec5SDimitry Andric def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 68520b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 68530b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68540b57cec5SDimitry Andric [(set VR128:$dst, 68558bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 68560b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; 68570b57cec5SDimitry Andric def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 68580b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 68590b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68600b57cec5SDimitry Andric [(set VR128:$dst, 68618bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, 68620b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; 68630b57cec5SDimitry Andric} 68640b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 68650b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 68660b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68670b57cec5SDimitry Andric [(set VR128:$dst, 68688bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 68690b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>; 68700b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 68710b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 68720b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68730b57cec5SDimitry Andric [(set VR128:$dst, 68748bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, 68750b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>; 68760b57cec5SDimitry Andric 68770b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68780b57cec5SDimitry Andric// PCLMUL Instructions 68790b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric// Immediate transform to help with commuting. 68828bcb0991SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<timm, [{ 68830b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 68840b57cec5SDimitry Andric return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 68850b57cec5SDimitry Andric}]>; 68860b57cec5SDimitry Andric 68870b57cec5SDimitry Andric// SSE carry-less Multiplication instructions 68880b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in { 68890b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 68900b57cec5SDimitry Andric let isCommutable = 1 in 68910b57cec5SDimitry Andric def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 68920b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 68930b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 68940b57cec5SDimitry Andric [(set VR128:$dst, 68958bcb0991SDimitry Andric (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, 68960b57cec5SDimitry Andric Sched<[WriteCLMul]>; 68970b57cec5SDimitry Andric 68980b57cec5SDimitry Andric def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 68990b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 69000b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 69010b57cec5SDimitry Andric [(set VR128:$dst, 69020b57cec5SDimitry Andric (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 69038bcb0991SDimitry Andric timm:$src3))]>, 69040b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 69050b57cec5SDimitry Andric } // Constraints = "$src1 = $dst" 69060b57cec5SDimitry Andric 69070b57cec5SDimitry Andric def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 69088bcb0991SDimitry Andric (i8 timm:$src3)), 69090b57cec5SDimitry Andric (PCLMULQDQrm VR128:$src1, addr:$src2, 69108bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 69110b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL] 69120b57cec5SDimitry Andric 69130b57cec5SDimitry Andric// SSE aliases 69140b57cec5SDimitry Andricforeach HI = ["hq","lq"] in 69150b57cec5SDimitry Andricforeach LO = ["hq","lq"] in { 69160b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 69170b57cec5SDimitry Andric (PCLMULQDQrr VR128:$dst, VR128:$src, 69180b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 69190b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 69200b57cec5SDimitry Andric (PCLMULQDQrm VR128:$dst, i128mem:$src, 69210b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 69220b57cec5SDimitry Andric} 69230b57cec5SDimitry Andric 69240b57cec5SDimitry Andric// AVX carry-less Multiplication instructions 69250b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 69260b57cec5SDimitry Andric PatFrag LdFrag, Intrinsic IntId> { 69270b57cec5SDimitry Andric let isCommutable = 1 in 69280b57cec5SDimitry Andric def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 69290b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 69300b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 69310b57cec5SDimitry Andric [(set RC:$dst, 69328bcb0991SDimitry Andric (IntId RC:$src1, RC:$src2, timm:$src3))]>, 69330b57cec5SDimitry Andric Sched<[WriteCLMul]>; 69340b57cec5SDimitry Andric 69350b57cec5SDimitry Andric def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 69360b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2, u8imm:$src3), 69370b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 69380b57cec5SDimitry Andric [(set RC:$dst, 69398bcb0991SDimitry Andric (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, 69400b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 69410b57cec5SDimitry Andric 69420b57cec5SDimitry Andric // We can commute a load in the first operand by swapping the sources and 69430b57cec5SDimitry Andric // rotating the immediate. 69448bcb0991SDimitry Andric def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), 69450b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, 69468bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 69470b57cec5SDimitry Andric} 69480b57cec5SDimitry Andric 69490b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 69500b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 69510b57cec5SDimitry Andric int_x86_pclmulqdq>, VEX_4V, VEX_WIG; 69520b57cec5SDimitry Andric 69530b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in 69540b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 69550b57cec5SDimitry Andric int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; 69560b57cec5SDimitry Andric 69570b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 69580b57cec5SDimitry Andric X86MemOperand MemOp, string Hi, string Lo> { 69595ffd83dbSDimitry Andric def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 69600b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, 69610b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 69625ffd83dbSDimitry Andric def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 69630b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, 69640b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 69650b57cec5SDimitry Andric} 69660b57cec5SDimitry Andric 69670b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 69680b57cec5SDimitry Andric X86MemOperand MemOp> { 69690b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 69700b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 69710b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 69720b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 69730b57cec5SDimitry Andric} 69740b57cec5SDimitry Andric 69750b57cec5SDimitry Andric// AVX aliases 69760b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 69770b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 69780b57cec5SDimitry Andric 69790b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69800b57cec5SDimitry Andric// SSE4A Instructions 69810b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69820b57cec5SDimitry Andric 69830b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in { 69840b57cec5SDimitry Andric 69850b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 69860b57cec5SDimitry Andriclet Constraints = "$src = $dst" in { 69870b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 69880b57cec5SDimitry Andric (ins VR128:$src, u8imm:$len, u8imm:$idx), 69890b57cec5SDimitry Andric "extrq\t{$idx, $len, $src|$src, $len, $idx}", 69908bcb0991SDimitry Andric [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, 69918bcb0991SDimitry Andric timm:$idx))]>, 69920b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 69930b57cec5SDimitry Andricdef EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 69940b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 69950b57cec5SDimitry Andric "extrq\t{$mask, $src|$src, $mask}", 69960b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 69970b57cec5SDimitry Andric VR128:$mask))]>, 69980b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 69990b57cec5SDimitry Andric 70000b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 70010b57cec5SDimitry Andric (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 70020b57cec5SDimitry Andric "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 70030b57cec5SDimitry Andric [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 70048bcb0991SDimitry Andric timm:$len, timm:$idx))]>, 70050b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 70060b57cec5SDimitry Andricdef INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 70070b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 70080b57cec5SDimitry Andric "insertq\t{$mask, $src|$src, $mask}", 70090b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 70100b57cec5SDimitry Andric VR128:$mask))]>, 70110b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 70120b57cec5SDimitry Andric} 70130b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 70140b57cec5SDimitry Andric 70150b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores. 70160b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 70170b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 70180b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 70190b57cec5SDimitry Andric "movntss\t{$src, $dst|$dst, $src}", []>, XS; 70200b57cec5SDimitry Andric 70210b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 70220b57cec5SDimitry Andric "movntsd\t{$src, $dst|$dst, $src}", []>, XD; 70230b57cec5SDimitry Andric} // SchedRW 70240b57cec5SDimitry Andric 70250b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst), 70260b57cec5SDimitry Andric (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 70270b57cec5SDimitry Andric 70280b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst), 70290b57cec5SDimitry Andric (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 70300b57cec5SDimitry Andric 70310b57cec5SDimitry Andric} // AddedComplexity 70320b57cec5SDimitry Andric} // HasSSE4A 70330b57cec5SDimitry Andric 70340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70350b57cec5SDimitry Andric// AVX Instructions 70360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70370b57cec5SDimitry Andric 70380b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70390b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the 70400b57cec5SDimitry Andric// destination operand 70410b57cec5SDimitry Andric// 70420b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 70430b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 70448bcb0991SDimitry Andric PatFrag bcast_frag, SchedWrite Sched> : 70450b57cec5SDimitry Andric AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 70460b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 70478bcb0991SDimitry Andric [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, 70480b57cec5SDimitry Andric Sched<[Sched]>, VEX; 70490b57cec5SDimitry Andric 70500b57cec5SDimitry Andric// AVX2 adds register forms 70510b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 70520b57cec5SDimitry Andric ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 70530b57cec5SDimitry Andric AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 70540b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 70550b57cec5SDimitry Andric [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 70560b57cec5SDimitry Andric Sched<[Sched]>, VEX; 70570b57cec5SDimitry Andric 70580b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 70590b57cec5SDimitry Andric def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 70608bcb0991SDimitry Andric f32mem, v4f32, X86VBroadcastld32, 70610b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>; 70620b57cec5SDimitry Andric def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 70638bcb0991SDimitry Andric f32mem, v8f32, X86VBroadcastld32, 70640b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 70650b57cec5SDimitry Andric} 70660b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 70670b57cec5SDimitry Andricdef VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 70688bcb0991SDimitry Andric v4f64, X86VBroadcastld64, 70690b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 70700b57cec5SDimitry Andric 70710b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 70720b57cec5SDimitry Andric def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 70730b57cec5SDimitry Andric v4f32, v4f32, SchedWriteFShuffle.XMM>; 70740b57cec5SDimitry Andric def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 70750b57cec5SDimitry Andric v8f32, v4f32, WriteFShuffle256>, VEX_L; 70760b57cec5SDimitry Andric} 70770b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 70780b57cec5SDimitry Andricdef VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 70790b57cec5SDimitry Andric v4f64, v2f64, WriteFShuffle256>, VEX_L; 70800b57cec5SDimitry Andric 70810b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70820b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 70830b57cec5SDimitry Andric// halves of a 256-bit vector. 70840b57cec5SDimitry Andric// 70850b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 70860b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 70870b57cec5SDimitry Andric (ins i128mem:$src), 70880b57cec5SDimitry Andric "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 70890b57cec5SDimitry Andric Sched<[WriteShuffleLd]>, VEX, VEX_L; 70900b57cec5SDimitry Andric 70910b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 70920b57cec5SDimitry Andric ExeDomain = SSEPackedSingle in 70930b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 70940b57cec5SDimitry Andric (ins f128mem:$src), 70950b57cec5SDimitry Andric "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 70960b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 70970b57cec5SDimitry Andric 70980b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7099e8d8bef9SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 71000b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 7101e8d8bef9SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 71020b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 71030b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 71040b57cec5SDimitry Andric// convert to integer when profitable. 7105e8d8bef9SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 71060b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 7107e8d8bef9SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 71080b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 7109e8d8bef9SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 71100b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 7111fcaf7f86SDimitry Andricdef : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 7112fcaf7f86SDimitry Andric (VBROADCASTF128 addr:$src)>; 7113e8d8bef9SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 71140b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 71150b57cec5SDimitry Andric} 71160b57cec5SDimitry Andric 71170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7118349cc55cSDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7119349cc55cSDimitry Andric// 7120349cc55cSDimitry Andric 7121349cc55cSDimitry Andriclet ExeDomain = SSEPackedSingle in { 7122349cc55cSDimitry Andriclet isCommutable = 1 in 7123349cc55cSDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7124349cc55cSDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7125349cc55cSDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7126349cc55cSDimitry Andric VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; 7127349cc55cSDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7128349cc55cSDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7129349cc55cSDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7130349cc55cSDimitry Andric VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 7131349cc55cSDimitry Andric} 7132349cc55cSDimitry Andric 7133349cc55cSDimitry Andric// Immediate transform to help with commuting. 7134349cc55cSDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<timm, [{ 7135349cc55cSDimitry Andric return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 7136349cc55cSDimitry Andric}]>; 7137349cc55cSDimitry Andric 7138349cc55cSDimitry Andricmulticlass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { 7139349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), 7140349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; 7141349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), 7142349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; 7143349cc55cSDimitry Andric // Pattern with load in other operand. 7144349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), 7145349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7146349cc55cSDimitry Andric (Perm2XCommuteImm timm:$imm))>; 7147349cc55cSDimitry Andric} 7148349cc55cSDimitry Andric 7149349cc55cSDimitry Andriclet Predicates = [HasAVX] in { 7150349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; 7151349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; 7152349cc55cSDimitry Andric} 7153349cc55cSDimitry Andric 7154349cc55cSDimitry Andriclet Predicates = [HasAVX1Only] in { 7155349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; 7156349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; 7157349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; 7158fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>; 7159349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; 7160349cc55cSDimitry Andric} 7161349cc55cSDimitry Andric 7162349cc55cSDimitry Andric//===----------------------------------------------------------------------===// 71630b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values 71640b57cec5SDimitry Andric// 71650b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 71660b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 71670b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 71680b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 71690b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 71700b57cec5SDimitry Andriclet mayLoad = 1 in 71710b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 71720b57cec5SDimitry Andric (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 71730b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 71740b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 71750b57cec5SDimitry Andric} 71760b57cec5SDimitry Andric 71770b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS 71780b57cec5SDimitry Andric// with YMM register containing zero. 71790b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs. 71800b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 71810b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 71820b57cec5SDimitry Andric} 71830b57cec5SDimitry Andric 7184349cc55cSDimitry Andricmulticlass vinsert_lowering<string InstrStr, string PermStr, 7185349cc55cSDimitry Andric ValueType From, ValueType To, 7186349cc55cSDimitry Andric PatFrag frommemop_frag, PatFrag tomemop_frag> { 71870b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 71880b57cec5SDimitry Andric (iPTR imm)), 71890b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 71900b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 71910b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 7192349cc55cSDimitry Andric (From (frommemop_frag addr:$src2)), 71930b57cec5SDimitry Andric (iPTR imm)), 71940b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 71950b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 7196349cc55cSDimitry Andric // Folding "To" vector - convert to perm2x128 and commute inputs. 7197349cc55cSDimitry Andric def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)), 7198349cc55cSDimitry Andric (From VR128:$src2), 7199349cc55cSDimitry Andric (iPTR imm)), 7200349cc55cSDimitry Andric (!cast<Instruction>(PermStr#rm) 7201349cc55cSDimitry Andric (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 7202349cc55cSDimitry Andric addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>; 72030b57cec5SDimitry Andric} 72040b57cec5SDimitry Andric 72050b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7206349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>; 7207349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>; 72080b57cec5SDimitry Andric} 72090b57cec5SDimitry Andric 72100b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7211349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; 7212349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; 7213349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; 7214fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>; 7215fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; 7216349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; 72170b57cec5SDimitry Andric} 72180b57cec5SDimitry Andric 72190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 72200b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values 72210b57cec5SDimitry Andric// 72220b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 72230b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 72240b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 72250b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 72260b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 72270b57cec5SDimitry Andriclet mayStore = 1 in 72280b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 72290b57cec5SDimitry Andric (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 72300b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 72310b57cec5SDimitry Andric []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 72320b57cec5SDimitry Andric} 72330b57cec5SDimitry Andric 72340b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 72350b57cec5SDimitry Andric def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 72360b57cec5SDimitry Andric (To (!cast<Instruction>(InstrStr#rr) 72370b57cec5SDimitry Andric (From VR256:$src1), 72380b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext)))>; 72390b57cec5SDimitry Andric def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 72400b57cec5SDimitry Andric (iPTR imm))), addr:$dst), 72410b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 72420b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext))>; 72430b57cec5SDimitry Andric} 72440b57cec5SDimitry Andric 72450b57cec5SDimitry Andric// AVX1 patterns 72460b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 72470b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 72480b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 72490b57cec5SDimitry Andric} 72500b57cec5SDimitry Andric 72510b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 72520b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 72530b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 72540b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 7255fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>; 7256fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 72570b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 72580b57cec5SDimitry Andric} 72590b57cec5SDimitry Andric 72600b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 72610b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores 72620b57cec5SDimitry Andric// 72630b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 72640b57cec5SDimitry Andric Intrinsic IntLd, Intrinsic IntLd256, 72658bcb0991SDimitry Andric Intrinsic IntSt, Intrinsic IntSt256, 72668bcb0991SDimitry Andric X86SchedWriteMaskMove schedX, 72678bcb0991SDimitry Andric X86SchedWriteMaskMove schedY> { 72680b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 72690b57cec5SDimitry Andric (ins VR128:$src1, f128mem:$src2), 72700b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 72710b57cec5SDimitry Andric [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 72728bcb0991SDimitry Andric VEX_4V, Sched<[schedX.RM]>; 72730b57cec5SDimitry Andric def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 72740b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2), 72750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 72760b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 72778bcb0991SDimitry Andric VEX_4V, VEX_L, Sched<[schedY.RM]>; 72780b57cec5SDimitry Andric def mr : AVX8I<opc_mr, MRMDestMem, (outs), 72790b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src1, VR128:$src2), 72800b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 72810b57cec5SDimitry Andric [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 72828bcb0991SDimitry Andric VEX_4V, Sched<[schedX.MR]>; 72830b57cec5SDimitry Andric def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 72840b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src1, VR256:$src2), 72850b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 72860b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 72878bcb0991SDimitry Andric VEX_4V, VEX_L, Sched<[schedY.MR]>; 72880b57cec5SDimitry Andric} 72890b57cec5SDimitry Andric 72900b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 72910b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 72920b57cec5SDimitry Andric int_x86_avx_maskload_ps, 72930b57cec5SDimitry Andric int_x86_avx_maskload_ps_256, 72940b57cec5SDimitry Andric int_x86_avx_maskstore_ps, 72958bcb0991SDimitry Andric int_x86_avx_maskstore_ps_256, 72968bcb0991SDimitry Andric WriteFMaskMove32, WriteFMaskMove32Y>; 72970b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 72980b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 72990b57cec5SDimitry Andric int_x86_avx_maskload_pd, 73000b57cec5SDimitry Andric int_x86_avx_maskload_pd_256, 73010b57cec5SDimitry Andric int_x86_avx_maskstore_pd, 73028bcb0991SDimitry Andric int_x86_avx_maskstore_pd_256, 73038bcb0991SDimitry Andric WriteFMaskMove64, WriteFMaskMove64Y>; 73040b57cec5SDimitry Andric 73050b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7306e8d8bef9SDimitry Andric// AVX_VNNI 7307e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 7308fe6060f1SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst", 7309fe6060f1SDimitry Andric ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in 7310e8d8bef9SDimitry Andricmulticlass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7311e8d8bef9SDimitry Andric bit IsCommutable> { 7312e8d8bef9SDimitry Andric let isCommutable = IsCommutable in 7313e8d8bef9SDimitry Andric def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 7314e8d8bef9SDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 7315e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7316e8d8bef9SDimitry Andric [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, 7317e8d8bef9SDimitry Andric VR128:$src2, VR128:$src3)))]>, 7318e8d8bef9SDimitry Andric VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7319e8d8bef9SDimitry Andric 7320e8d8bef9SDimitry Andric def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 7321e8d8bef9SDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 7322e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7323e8d8bef9SDimitry Andric [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, 7324e8d8bef9SDimitry Andric (loadv4i32 addr:$src3))))]>, 7325e8d8bef9SDimitry Andric VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7326e8d8bef9SDimitry Andric 7327e8d8bef9SDimitry Andric let isCommutable = IsCommutable in 7328e8d8bef9SDimitry Andric def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 7329e8d8bef9SDimitry Andric (ins VR256:$src1, VR256:$src2, VR256:$src3), 7330e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7331e8d8bef9SDimitry Andric [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, 7332e8d8bef9SDimitry Andric VR256:$src2, VR256:$src3)))]>, 7333e8d8bef9SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7334e8d8bef9SDimitry Andric 7335e8d8bef9SDimitry Andric def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 7336e8d8bef9SDimitry Andric (ins VR256:$src1, VR256:$src2, i256mem:$src3), 7337e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7338e8d8bef9SDimitry Andric [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, 7339e8d8bef9SDimitry Andric (loadv8i32 addr:$src3))))]>, 7340e8d8bef9SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7341e8d8bef9SDimitry Andric} 7342e8d8bef9SDimitry Andric 7343fe6060f1SDimitry Andricdefm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>; 7344fe6060f1SDimitry Andricdefm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>; 7345fe6060f1SDimitry Andricdefm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>; 7346fe6060f1SDimitry Andricdefm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>; 7347e8d8bef9SDimitry Andric 7348e8d8bef9SDimitry Andricdef X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 7349e8d8bef9SDimitry Andric (X86vpmaddwd node:$lhs, node:$rhs), [{ 7350e8d8bef9SDimitry Andric return N->hasOneUse(); 7351e8d8bef9SDimitry Andric}]>; 7352e8d8bef9SDimitry Andric 7353e8d8bef9SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { 7354e8d8bef9SDimitry Andric def : Pat<(v8i32 (add VR256:$src1, 7355e8d8bef9SDimitry Andric (X86vpmaddwd_su VR256:$src2, VR256:$src3))), 7356e8d8bef9SDimitry Andric (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; 7357e8d8bef9SDimitry Andric def : Pat<(v8i32 (add VR256:$src1, 7358e8d8bef9SDimitry Andric (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), 7359e8d8bef9SDimitry Andric (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; 7360e8d8bef9SDimitry Andric def : Pat<(v4i32 (add VR128:$src1, 7361e8d8bef9SDimitry Andric (X86vpmaddwd_su VR128:$src2, VR128:$src3))), 7362e8d8bef9SDimitry Andric (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 7363e8d8bef9SDimitry Andric def : Pat<(v4i32 (add VR128:$src1, 7364e8d8bef9SDimitry Andric (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), 7365e8d8bef9SDimitry Andric (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; 7366e8d8bef9SDimitry Andric} 7367e8d8bef9SDimitry Andric 7368e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 73690b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values 73700b57cec5SDimitry Andric// 73710b57cec5SDimitry Andric 73720b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 73730b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop_f, 73740b57cec5SDimitry Andric X86MemOperand x86memop_i, 73750b57cec5SDimitry Andric ValueType f_vt, ValueType i_vt, 73760b57cec5SDimitry Andric X86FoldableSchedWrite sched, 73770b57cec5SDimitry Andric X86FoldableSchedWrite varsched> { 73780b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 73790b57cec5SDimitry Andric def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 73800b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 73810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73820b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, 73830b57cec5SDimitry Andric Sched<[varsched]>; 73840b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 73850b57cec5SDimitry Andric (ins RC:$src1, x86memop_i:$src2), 73860b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73870b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 73880b57cec5SDimitry Andric (i_vt (load addr:$src2)))))]>, VEX_4V, 73890b57cec5SDimitry Andric Sched<[varsched.Folded, sched.ReadAfterFold]>; 73900b57cec5SDimitry Andric 73910b57cec5SDimitry Andric def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 73920b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 73930b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73948bcb0991SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, 73950b57cec5SDimitry Andric Sched<[sched]>; 73960b57cec5SDimitry Andric def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 73970b57cec5SDimitry Andric (ins x86memop_f:$src1, u8imm:$src2), 73980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73990b57cec5SDimitry Andric [(set RC:$dst, 74008bcb0991SDimitry Andric (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, 74010b57cec5SDimitry Andric Sched<[sched.Folded]>; 74020b57cec5SDimitry Andric }// Predicates = [HasAVX, NoVLX] 74030b57cec5SDimitry Andric} 74040b57cec5SDimitry Andric 74050b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 74060b57cec5SDimitry Andric defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 74070b57cec5SDimitry Andric v4f32, v4i32, SchedWriteFShuffle.XMM, 74080b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 74090b57cec5SDimitry Andric defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 74100b57cec5SDimitry Andric v8f32, v8i32, SchedWriteFShuffle.YMM, 74110b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 74120b57cec5SDimitry Andric} 74130b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 74140b57cec5SDimitry Andric defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 74150b57cec5SDimitry Andric v2f64, v2i64, SchedWriteFShuffle.XMM, 74160b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 74170b57cec5SDimitry Andric defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 74180b57cec5SDimitry Andric v4f64, v4i64, SchedWriteFShuffle.YMM, 74190b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 74200b57cec5SDimitry Andric} 74210b57cec5SDimitry Andric 74220b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 74230b57cec5SDimitry Andric// VZERO - Zero YMM registers 74240b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31. 74250b57cec5SDimitry Andric// 74260b57cec5SDimitry Andric 74270b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 74280b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 74290b57cec5SDimitry Andric YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 74300b57cec5SDimitry Andric // Zero All YMM registers 74310b57cec5SDimitry Andric def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 74320b57cec5SDimitry Andric [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, 74330b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 74340b57cec5SDimitry Andric 74350b57cec5SDimitry Andric // Zero Upper bits of YMM registers 74360b57cec5SDimitry Andric def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 74370b57cec5SDimitry Andric [(int_x86_avx_vzeroupper)]>, PS, VEX, 74380b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 74390b57cec5SDimitry Andric} // Defs 74400b57cec5SDimitry Andric} // SchedRW 74410b57cec5SDimitry Andric 74420b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 74430b57cec5SDimitry Andric// Half precision conversion instructions 74440b57cec5SDimitry Andric// 74450b57cec5SDimitry Andric 74460b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 74470b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 74480b57cec5SDimitry Andric def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 74490b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 74505ffd83dbSDimitry Andric [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>, 74510b57cec5SDimitry Andric T8PD, VEX, Sched<[sched]>; 74520b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 74530b57cec5SDimitry Andric def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 74540b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 74555ffd83dbSDimitry Andric []>, T8PD, VEX, Sched<[sched.Folded]>; 74560b57cec5SDimitry Andric} 74570b57cec5SDimitry Andric 74580b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 74590b57cec5SDimitry Andric SchedWrite RR, SchedWrite MR> { 74600b57cec5SDimitry Andric def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 74610b57cec5SDimitry Andric (ins RC:$src1, i32u8imm:$src2), 74620b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 74635ffd83dbSDimitry Andric [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>, 74640b57cec5SDimitry Andric TAPD, VEX, Sched<[RR]>; 74650b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 74660b57cec5SDimitry Andric def mr : Ii8<0x1D, MRMDestMem, (outs), 74670b57cec5SDimitry Andric (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 74680b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 74690b57cec5SDimitry Andric TAPD, VEX, Sched<[MR]>; 74700b57cec5SDimitry Andric} 74710b57cec5SDimitry Andric 74720b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 7473480093f4SDimitry Andric defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC; 7474480093f4SDimitry Andric defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC; 74750b57cec5SDimitry Andric defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 7476480093f4SDimitry Andric WriteCvtPS2PHSt>, SIMD_EXC; 74770b57cec5SDimitry Andric defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 7478480093f4SDimitry Andric WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC; 74790b57cec5SDimitry Andric 74800b57cec5SDimitry Andric // Pattern match vcvtph2ps of a scalar i64 load. 74815ffd83dbSDimitry Andric def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 74820b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 74835ffd83dbSDimitry Andric def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 74840b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 74850b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 74865ffd83dbSDimitry Andric def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))), 74875ffd83dbSDimitry Andric (VCVTPH2PSYrm addr:$src)>; 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 74905ffd83dbSDimitry Andric (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 74910b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 74928bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 74930b57cec5SDimitry Andric def : Pat<(store (i64 (extractelt 74945ffd83dbSDimitry Andric (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 74950b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 74968bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 74975ffd83dbSDimitry Andric def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), 74988bcb0991SDimitry Andric (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; 74990b57cec5SDimitry Andric} 75000b57cec5SDimitry Andric 75010b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75020b57cec5SDimitry Andric// AVX2 Instructions 75030b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75040b57cec5SDimitry Andric 75050b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 75060b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 75070b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite sched, 75080b57cec5SDimitry Andric RegisterClass RC, 75090b57cec5SDimitry Andric X86MemOperand x86memop, SDNodeXForm commuteXForm> { 75100b57cec5SDimitry Andric let isCommutable = 1 in 75110b57cec5SDimitry Andric def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 75120b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 75130b57cec5SDimitry Andric !strconcat(OpcodeStr, 75140b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 75158bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 75160b57cec5SDimitry Andric Sched<[sched]>, VEX_4V; 75170b57cec5SDimitry Andric def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 75180b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 75190b57cec5SDimitry Andric !strconcat(OpcodeStr, 75200b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 75210b57cec5SDimitry Andric [(set RC:$dst, 75228bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, 75230b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; 75240b57cec5SDimitry Andric 75250b57cec5SDimitry Andric // Pattern to commute if load is in first source. 75268bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), 75270b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 75288bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 75290b57cec5SDimitry Andric} 75300b57cec5SDimitry Andric 75310b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 75320b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 75330b57cec5SDimitry Andric SchedWriteBlend.XMM, VR128, i128mem, 75340b57cec5SDimitry Andric BlendCommuteImm4>; 75350b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 75360b57cec5SDimitry Andric SchedWriteBlend.YMM, VR256, i256mem, 75370b57cec5SDimitry Andric BlendCommuteImm8>, VEX_L; 75380b57cec5SDimitry Andric 75398bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 75408bcb0991SDimitry Andric (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; 75418bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 75428bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 75438bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 75448bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 75450b57cec5SDimitry Andric 75468bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 75478bcb0991SDimitry Andric (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; 75488bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 75498bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; 75508bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 75518bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; 75520b57cec5SDimitry Andric} 75530b57cec5SDimitry Andric 75540b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 75550b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 75565ffd83dbSDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing should 75570b57cec5SDimitry Andric// take care of using integer instructions when profitable. 75580b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 75590b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 75600b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 75610b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75620b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 75630b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 75640b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 75650b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75660b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 75670b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 75680b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 75690b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75700b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7571fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)), 7572fcaf7f86SDimitry Andric (VBLENDPSYrri VR256:$src1, 7573fcaf7f86SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7574fcaf7f86SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 75750b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 75760b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 75770b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75780b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 75790b57cec5SDimitry Andric 75800b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 75810b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75820b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 75830b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 75840b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75850b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 75860b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 75870b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75880b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7589fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)), 7590fcaf7f86SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7591fcaf7f86SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 75920b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 75930b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 75940b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 75950b57cec5SDimitry Andric} 75960b57cec5SDimitry Andric 75970b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75980b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the 75990b57cec5SDimitry Andric// destination operand 76000b57cec5SDimitry Andric// 76010b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr, 76028bcb0991SDimitry Andric X86MemOperand x86memop, PatFrag bcast_frag, 76030b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, Predicate prd> { 76040b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in { 76050b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 76060b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76070b57cec5SDimitry Andric [(set VR128:$dst, 76080b57cec5SDimitry Andric (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 76090b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM]>, VEX; 76100b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 76110b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76120b57cec5SDimitry Andric [(set VR128:$dst, 76138bcb0991SDimitry Andric (OpVT128 (bcast_frag addr:$src)))]>, 76140b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 76150b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 76160b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76170b57cec5SDimitry Andric [(set VR256:$dst, 76180b57cec5SDimitry Andric (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 76190b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 76200b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 76210b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76220b57cec5SDimitry Andric [(set VR256:$dst, 76238bcb0991SDimitry Andric (OpVT256 (bcast_frag addr:$src)))]>, 76240b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 76250b57cec5SDimitry Andric 76260b57cec5SDimitry Andric // Provide aliases for broadcast from the same register class that 76270b57cec5SDimitry Andric // automatically does the extract. 76280b57cec5SDimitry Andric def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 76290b57cec5SDimitry Andric (!cast<Instruction>(NAME#"Yrr") 76300b57cec5SDimitry Andric (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 76310b57cec5SDimitry Andric } 76320b57cec5SDimitry Andric} 76330b57cec5SDimitry Andric 76348bcb0991SDimitry Andricdefm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, 76350b57cec5SDimitry Andric v16i8, v32i8, NoVLX_Or_NoBWI>; 76368bcb0991SDimitry Andricdefm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, 76370b57cec5SDimitry Andric v8i16, v16i16, NoVLX_Or_NoBWI>; 76388bcb0991SDimitry Andricdefm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, 76390b57cec5SDimitry Andric v4i32, v8i32, NoVLX>; 76408bcb0991SDimitry Andricdefm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, 76410b57cec5SDimitry Andric v2i64, v4i64, NoVLX>; 76420b57cec5SDimitry Andric 76430b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 76440b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 76450b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 76460b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 76470b57cec5SDimitry Andric (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 76480b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 76490b57cec5SDimitry Andric (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 76500b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 76510b57cec5SDimitry Andric (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 76520b57cec5SDimitry Andric} 76530b57cec5SDimitry Andric 76540b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 76550b57cec5SDimitry Andric def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 76568bcb0991SDimitry Andric (VPBROADCASTBrr (VMOVDI2PDIrr 76570b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 76588bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 76590b57cec5SDimitry Andric def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 76608bcb0991SDimitry Andric (VPBROADCASTBYrr (VMOVDI2PDIrr 76610b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 76628bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 76630b57cec5SDimitry Andric 76640b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 76658bcb0991SDimitry Andric (VPBROADCASTWrr (VMOVDI2PDIrr 76660b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 76678bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 76680b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 76698bcb0991SDimitry Andric (VPBROADCASTWYrr (VMOVDI2PDIrr 76700b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 76718bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 767281ad6265SDimitry Andric 767381ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 767481ad6265SDimitry Andric (VPBROADCASTWrm addr:$src)>; 767581ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 767681ad6265SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 767781ad6265SDimitry Andric 767881ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128:$src))), 767981ad6265SDimitry Andric (VPBROADCASTWrr VR128:$src)>; 768081ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128:$src))), 768181ad6265SDimitry Andric (VPBROADCASTWYrr VR128:$src)>; 768281ad6265SDimitry Andric 768381ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcast (f16 FR16:$src))), 768481ad6265SDimitry Andric (VPBROADCASTWrr (COPY_TO_REGCLASS FR16:$src, VR128))>; 768581ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcast (f16 FR16:$src))), 768681ad6265SDimitry Andric (VPBROADCASTWYrr (COPY_TO_REGCLASS FR16:$src, VR128))>; 76870b57cec5SDimitry Andric} 76880b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 76890b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 76908bcb0991SDimitry Andric (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; 76910b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 76928bcb0991SDimitry Andric (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; 76930b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 76948bcb0991SDimitry Andric (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; 76950b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 76968bcb0991SDimitry Andric (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; 76970b57cec5SDimitry Andric} 76980b57cec5SDimitry Andric 76990b57cec5SDimitry Andric// AVX1 broadcast patterns 77000b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 77018bcb0991SDimitry Andricdef : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), 77020b57cec5SDimitry Andric (VBROADCASTSSYrm addr:$src)>; 77038bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), 77040b57cec5SDimitry Andric (VBROADCASTSDYrm addr:$src)>; 77058bcb0991SDimitry Andricdef : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), 77060b57cec5SDimitry Andric (VBROADCASTSSrm addr:$src)>; 77070b57cec5SDimitry Andric} 77080b57cec5SDimitry Andric 77090b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 77100b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 77110b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 77120b57cec5SDimitry Andric // 128bit broadcasts: 77130b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast f64:$src)), 77140b57cec5SDimitry Andric (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 77158bcb0991SDimitry Andric def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), 77160b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 77170b57cec5SDimitry Andric 77180b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 77190b57cec5SDimitry Andric (VMOVDDUPrr VR128:$src)>; 77200b57cec5SDimitry Andric} 77210b57cec5SDimitry Andric 77220b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 77230b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 77240b57cec5SDimitry Andric (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 77250b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 77260b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 77270b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 77280b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 7729349cc55cSDimitry Andric def : Pat<(v8f32 (X86VBroadcast v4f32:$src)), 7730349cc55cSDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7731349cc55cSDimitry Andric (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm), 7732349cc55cSDimitry Andric (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>; 77330b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 77340b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 77350b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 77360b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 7737349cc55cSDimitry Andric def : Pat<(v4f64 (X86VBroadcast v2f64:$src)), 7738349cc55cSDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7739349cc55cSDimitry Andric (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm), 7740349cc55cSDimitry Andric (v2f64 (VMOVDDUPrr VR128:$src)), 1)>; 77410b57cec5SDimitry Andric 77420b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 77438bcb0991SDimitry Andric (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; 77440b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 77450b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 77468bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), 77478bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; 77480b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 77490b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 77508bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), 77518bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; 77520b57cec5SDimitry Andric 77530b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast i64:$src)), 77548bcb0991SDimitry Andric (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; 77558bcb0991SDimitry Andric def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), 77560b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 77570b57cec5SDimitry Andric} 77580b57cec5SDimitry Andric 77590b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 77600b57cec5SDimitry Andric// VPERM - Permute instructions 77610b57cec5SDimitry Andric// 77620b57cec5SDimitry Andric 77630b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr, 77640b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 77650b57cec5SDimitry Andric X86MemOperand memOp> { 77660b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 77670b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 77680b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 77690b57cec5SDimitry Andric !strconcat(OpcodeStr, 77700b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77710b57cec5SDimitry Andric [(set VR256:$dst, 77720b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 77730b57cec5SDimitry Andric Sched<[Sched]>, VEX_4V, VEX_L; 77740b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 77750b57cec5SDimitry Andric (ins VR256:$src1, memOp:$src2), 77760b57cec5SDimitry Andric !strconcat(OpcodeStr, 77770b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77780b57cec5SDimitry Andric [(set VR256:$dst, 77790b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, 77800b57cec5SDimitry Andric (load addr:$src2))))]>, 77810b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; 77820b57cec5SDimitry Andric } 77830b57cec5SDimitry Andric} 77840b57cec5SDimitry Andric 77850b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 77860b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 77870b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 77880b57cec5SDimitry Andric 77890b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 77900b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 77910b57cec5SDimitry Andric X86MemOperand memOp> { 77920b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 77930b57cec5SDimitry Andric def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 77940b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 77950b57cec5SDimitry Andric !strconcat(OpcodeStr, 77960b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 77970b57cec5SDimitry Andric [(set VR256:$dst, 77988bcb0991SDimitry Andric (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, 77990b57cec5SDimitry Andric Sched<[Sched]>, VEX, VEX_L; 78000b57cec5SDimitry Andric def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 78010b57cec5SDimitry Andric (ins memOp:$src1, u8imm:$src2), 78020b57cec5SDimitry Andric !strconcat(OpcodeStr, 78030b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78040b57cec5SDimitry Andric [(set VR256:$dst, 78050b57cec5SDimitry Andric (OpVT (X86VPermi (mem_frag addr:$src1), 78068bcb0991SDimitry Andric (i8 timm:$src2))))]>, 78070b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 78080b57cec5SDimitry Andric } 78090b57cec5SDimitry Andric} 78100b57cec5SDimitry Andric 78110b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 78120b57cec5SDimitry Andric WriteShuffle256, i256mem>, VEX_W; 78130b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 78140b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 78150b57cec5SDimitry Andric WriteFShuffle256, f256mem>, VEX_W; 78160b57cec5SDimitry Andric 78170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7818e8d8bef9SDimitry Andric// VPERM2I128 - Permute Integer vector Values in 128-bit chunks 78190b57cec5SDimitry Andric// 78200b57cec5SDimitry Andriclet isCommutable = 1 in 78210b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 78220b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7823e8d8bef9SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7824e8d8bef9SDimitry Andric Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 78250b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 78260b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7827e8d8bef9SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 78280b57cec5SDimitry Andric Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 78290b57cec5SDimitry Andric 7830e8d8bef9SDimitry Andriclet Predicates = [HasAVX2] in { 7831e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; 7832e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; 7833e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; 7834fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>; 7835fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7836e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7837e8d8bef9SDimitry Andric} 78380b57cec5SDimitry Andric 78390b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78400b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values 78410b57cec5SDimitry Andric// 78420b57cec5SDimitry Andriclet hasSideEffects = 0 in { 78430b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 78440b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 78450b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 78460b57cec5SDimitry Andric []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 78470b57cec5SDimitry Andriclet mayLoad = 1 in 78480b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 78490b57cec5SDimitry Andric (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 78500b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 78510b57cec5SDimitry Andric []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 78520b57cec5SDimitry Andric} 78530b57cec5SDimitry Andric 78540b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7855349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; 7856349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; 7857349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; 7858fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16, loadv16f16>; 7859fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; 7860349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; 78610b57cec5SDimitry Andric} 78620b57cec5SDimitry Andric 78630b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78640b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values 78650b57cec5SDimitry Andric// 78660b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 78670b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 78680b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 78690b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 78700b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in 78710b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 78720b57cec5SDimitry Andric (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 78730b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 78740b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 78750b57cec5SDimitry Andric 78760b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 78770b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 78780b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 78790b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 7880fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>; 7881fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 78820b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 78830b57cec5SDimitry Andric} 78840b57cec5SDimitry Andric 78850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78860b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 78870b57cec5SDimitry Andric// 78880b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr, 78890b57cec5SDimitry Andric Intrinsic IntLd128, Intrinsic IntLd256, 78905ffd83dbSDimitry Andric Intrinsic IntSt128, Intrinsic IntSt256, 78915ffd83dbSDimitry Andric X86SchedWriteMaskMove schedX, 78925ffd83dbSDimitry Andric X86SchedWriteMaskMove schedY> { 78930b57cec5SDimitry Andric def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 78940b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 78950b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78960b57cec5SDimitry Andric [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 78975ffd83dbSDimitry Andric VEX_4V, Sched<[schedX.RM]>; 78980b57cec5SDimitry Andric def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 78990b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 79000b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79010b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 79025ffd83dbSDimitry Andric VEX_4V, VEX_L, Sched<[schedY.RM]>; 79030b57cec5SDimitry Andric def mr : AVX28I<0x8e, MRMDestMem, (outs), 79040b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src1, VR128:$src2), 79050b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79060b57cec5SDimitry Andric [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 79075ffd83dbSDimitry Andric VEX_4V, Sched<[schedX.MR]>; 79080b57cec5SDimitry Andric def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 79090b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src1, VR256:$src2), 79100b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79110b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 79125ffd83dbSDimitry Andric VEX_4V, VEX_L, Sched<[schedY.MR]>; 79130b57cec5SDimitry Andric} 79140b57cec5SDimitry Andric 79150b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 79160b57cec5SDimitry Andric int_x86_avx2_maskload_d, 79170b57cec5SDimitry Andric int_x86_avx2_maskload_d_256, 79180b57cec5SDimitry Andric int_x86_avx2_maskstore_d, 79195ffd83dbSDimitry Andric int_x86_avx2_maskstore_d_256, 79205ffd83dbSDimitry Andric WriteVecMaskMove32, WriteVecMaskMove32Y>; 79210b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 79220b57cec5SDimitry Andric int_x86_avx2_maskload_q, 79230b57cec5SDimitry Andric int_x86_avx2_maskload_q_256, 79240b57cec5SDimitry Andric int_x86_avx2_maskstore_q, 79255ffd83dbSDimitry Andric int_x86_avx2_maskstore_q_256, 79265ffd83dbSDimitry Andric WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W; 79270b57cec5SDimitry Andric 79280b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 79298bcb0991SDimitry Andric ValueType MaskVT> { 79300b57cec5SDimitry Andric // masked store 79310b57cec5SDimitry Andric def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 79320b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 79330b57cec5SDimitry Andric // masked load 79340b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 79350b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 79360b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 79370b57cec5SDimitry Andric (VT immAllZerosV))), 79380b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 79390b57cec5SDimitry Andric} 79400b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 79418bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; 79428bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; 79438bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; 79448bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; 79450b57cec5SDimitry Andric} 79460b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 79470b57cec5SDimitry Andric // load/store i32/i64 not supported use ps/pd version 79488bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; 79498bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; 79508bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; 79518bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; 79520b57cec5SDimitry Andric} 79530b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 79548bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; 79558bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; 79568bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; 79578bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; 79580b57cec5SDimitry Andric} 79590b57cec5SDimitry Andric 79600b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 79610b57cec5SDimitry Andric// Variable Bit Shifts 79620b57cec5SDimitry Andric// 79630b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 79640b57cec5SDimitry Andric ValueType vt128, ValueType vt256> { 79650b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 79660b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 79670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79680b57cec5SDimitry Andric [(set VR128:$dst, 79690b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 79700b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; 79710b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 79720b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 79730b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79740b57cec5SDimitry Andric [(set VR128:$dst, 79750b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, 79760b57cec5SDimitry Andric (vt128 (load addr:$src2)))))]>, 79770b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, 79780b57cec5SDimitry Andric SchedWriteVarVecShift.XMM.ReadAfterFold]>; 79790b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 79800b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 79810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79820b57cec5SDimitry Andric [(set VR256:$dst, 79830b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 79840b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 79850b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 79860b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 79870b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79880b57cec5SDimitry Andric [(set VR256:$dst, 79890b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, 79900b57cec5SDimitry Andric (vt256 (load addr:$src2)))))]>, 79910b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 79920b57cec5SDimitry Andric SchedWriteVarVecShift.YMM.ReadAfterFold]>; 79930b57cec5SDimitry Andric} 79940b57cec5SDimitry Andric 79950b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 79960b57cec5SDimitry Andric defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 79970b57cec5SDimitry Andric defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; 79980b57cec5SDimitry Andric defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 79990b57cec5SDimitry Andric defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; 80000b57cec5SDimitry Andric defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 80010b57cec5SDimitry Andric} 80020b57cec5SDimitry Andric 80030b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80040b57cec5SDimitry Andric// VGATHER - GATHER Operations 80050b57cec5SDimitry Andric 80060b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions. 8007349cc55cSDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8008349cc55cSDimitry Andric X86MemOperand memop128, X86MemOperand memop256> { 80095ffd83dbSDimitry Andriclet mayLoad = 1, hasSideEffects = 0 in { 80100b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 80110b57cec5SDimitry Andric (ins VR128:$src1, memop128:$src2, VR128:$mask), 80120b57cec5SDimitry Andric !strconcat(OpcodeStr, 80130b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8014fe6060f1SDimitry Andric []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 80150b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 80160b57cec5SDimitry Andric (ins RC256:$src1, memop256:$src2, RC256:$mask), 80170b57cec5SDimitry Andric !strconcat(OpcodeStr, 80180b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8019fe6060f1SDimitry Andric []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 80205ffd83dbSDimitry Andric} 80210b57cec5SDimitry Andric} 80220b57cec5SDimitry Andric 80230b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 80240b57cec5SDimitry Andric let mayLoad = 1, hasSideEffects = 0, Constraints 80250b57cec5SDimitry Andric = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 80260b57cec5SDimitry Andric in { 8027349cc55cSDimitry Andric defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", 80285ffd83dbSDimitry Andric VR256, vx128mem, vx256mem>, VEX_W; 8029349cc55cSDimitry Andric defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", 80305ffd83dbSDimitry Andric VR256, vx128mem, vy256mem>, VEX_W; 8031349cc55cSDimitry Andric defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", 80325ffd83dbSDimitry Andric VR256, vx128mem, vy256mem>; 8033349cc55cSDimitry Andric defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", 80345ffd83dbSDimitry Andric VR128, vx64mem, vy128mem>; 80350b57cec5SDimitry Andric 80360b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 8037349cc55cSDimitry Andric defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", 8038349cc55cSDimitry Andric VR256, vx128mem, vx256mem>, VEX_W; 8039349cc55cSDimitry Andric defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", 8040349cc55cSDimitry Andric VR256, vx128mem, vy256mem>, VEX_W; 80410b57cec5SDimitry Andric } 80420b57cec5SDimitry Andric 80430b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 8044349cc55cSDimitry Andric defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", 8045349cc55cSDimitry Andric VR256, vx128mem, vy256mem>; 8046349cc55cSDimitry Andric defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", 8047349cc55cSDimitry Andric VR128, vx64mem, vy128mem>; 80480b57cec5SDimitry Andric } 80490b57cec5SDimitry Andric } 80500b57cec5SDimitry Andric} 80510b57cec5SDimitry Andric 80520b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80530b57cec5SDimitry Andric// GFNI instructions 80540b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80550b57cec5SDimitry Andric 80560b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 80570b57cec5SDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 8058*bdd1243dSDimitry Andric X86MemOperand X86MemOp, X86FoldableSchedWrite sched, 8059*bdd1243dSDimitry Andric bit Is2Addr = 0> { 80600b57cec5SDimitry Andric let ExeDomain = SSEPackedInt, 80610b57cec5SDimitry Andric AsmString = !if(Is2Addr, 80625ffd83dbSDimitry Andric OpcodeStr#"\t{$src2, $dst|$dst, $src2}", 80635ffd83dbSDimitry Andric OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 80640b57cec5SDimitry Andric let isCommutable = 1 in 80650b57cec5SDimitry Andric def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 80660b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 8067*bdd1243dSDimitry Andric Sched<[sched]>, T8PD; 80680b57cec5SDimitry Andric 80690b57cec5SDimitry Andric def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 80700b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 80710b57cec5SDimitry Andric (MemOpFrag addr:$src2))))]>, 8072*bdd1243dSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, T8PD; 80730b57cec5SDimitry Andric } 80740b57cec5SDimitry Andric} 80750b57cec5SDimitry Andric 80760b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 80770b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 8078*bdd1243dSDimitry Andric X86MemOperand X86MemOp, X86FoldableSchedWrite sched, 8079*bdd1243dSDimitry Andric bit Is2Addr = 0> { 80800b57cec5SDimitry Andric let AsmString = !if(Is2Addr, 80815ffd83dbSDimitry Andric OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 80825ffd83dbSDimitry Andric OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 80830b57cec5SDimitry Andric def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 80840b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), "", 80858bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], 8086*bdd1243dSDimitry Andric SSEPackedInt>, Sched<[sched]>; 80870b57cec5SDimitry Andric def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 80880b57cec5SDimitry Andric (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 80890b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, 80900b57cec5SDimitry Andric (MemOpFrag addr:$src2), 80918bcb0991SDimitry Andric timm:$src3)))], SSEPackedInt>, 8092*bdd1243dSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 80930b57cec5SDimitry Andric } 80940b57cec5SDimitry Andric} 80950b57cec5SDimitry Andric 80960b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 80970b57cec5SDimitry Andric let Constraints = "$src1 = $dst", 80980b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 80990b57cec5SDimitry Andric defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 8100*bdd1243dSDimitry Andric VR128, load, i128mem, SchedWriteVecIMul.XMM, 1>; 8101*bdd1243dSDimitry Andric let Predicates = [HasGFNI, HasAVX, NoVLX] in { 81025ffd83dbSDimitry Andric defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128, 8103*bdd1243dSDimitry Andric load, i128mem, SchedWriteVecIMul.XMM>, 8104*bdd1243dSDimitry Andric VEX_4V, VEX_W; 81055ffd83dbSDimitry Andric defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256, 8106*bdd1243dSDimitry Andric load, i256mem, SchedWriteVecIMul.YMM>, 8107*bdd1243dSDimitry Andric VEX_4V, VEX_L, VEX_W; 81080b57cec5SDimitry Andric } 81090b57cec5SDimitry Andric} 81100b57cec5SDimitry Andric 81110b57cec5SDimitry Andric// GF2P8MULB 81120b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", 81130b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 81140b57cec5SDimitry Andricdefm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 8115*bdd1243dSDimitry Andric i128mem, SchedWriteVecALU.XMM, 1>; 8116*bdd1243dSDimitry Andriclet Predicates = [HasGFNI, HasAVX, NoVLX] in { 81170b57cec5SDimitry Andric defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 8118*bdd1243dSDimitry Andric i128mem, SchedWriteVecALU.XMM>, VEX_4V; 81190b57cec5SDimitry Andric defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 8120*bdd1243dSDimitry Andric i256mem, SchedWriteVecALU.YMM>, VEX_4V, VEX_L; 81210b57cec5SDimitry Andric} 81220b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB 81230b57cec5SDimitry Andriclet isCommutable = 0 in { 81240b57cec5SDimitry Andric defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 81250b57cec5SDimitry Andric X86GF2P8affineinvqb>, TAPD; 81260b57cec5SDimitry Andric defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 81270b57cec5SDimitry Andric X86GF2P8affineqb>, TAPD; 81280b57cec5SDimitry Andric} 81290b57cec5SDimitry Andric 8130*bdd1243dSDimitry Andric// AVX-IFMA 8131*bdd1243dSDimitry Andriclet Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst", 8132*bdd1243dSDimitry Andric checkVEXPredicate = 1 in 8133*bdd1243dSDimitry Andricmulticlass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> { 8134*bdd1243dSDimitry Andric // NOTE: The SDNode have the multiply operands first with the add last. 8135*bdd1243dSDimitry Andric // This enables commuted load patterns to be autogenerated by tablegen. 8136*bdd1243dSDimitry Andric let isCommutable = 1 in { 8137*bdd1243dSDimitry Andric def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 8138*bdd1243dSDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 8139*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8140*bdd1243dSDimitry Andric [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, 8141*bdd1243dSDimitry Andric VR128:$src3, VR128:$src1)))]>, 8142*bdd1243dSDimitry Andric VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 8143*bdd1243dSDimitry Andric } 8144*bdd1243dSDimitry Andric def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 8145*bdd1243dSDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 8146*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8147*bdd1243dSDimitry Andric [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, 8148*bdd1243dSDimitry Andric (loadv2i64 addr:$src3), VR128:$src1)))]>, 8149*bdd1243dSDimitry Andric VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 8150*bdd1243dSDimitry Andric let isCommutable = 1 in { 8151*bdd1243dSDimitry Andric def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 8152*bdd1243dSDimitry Andric (ins VR256:$src1, VR256:$src2, VR256:$src3), 8153*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8154*bdd1243dSDimitry Andric [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, 8155*bdd1243dSDimitry Andric VR256:$src3, VR256:$src1)))]>, 8156*bdd1243dSDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 8157*bdd1243dSDimitry Andric } 8158*bdd1243dSDimitry Andric def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 8159*bdd1243dSDimitry Andric (ins VR256:$src1, VR256:$src2, i256mem:$src3), 8160*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8161*bdd1243dSDimitry Andric [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, 8162*bdd1243dSDimitry Andric (loadv4i64 addr:$src3), VR256:$src1)))]>, 8163*bdd1243dSDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 8164*bdd1243dSDimitry Andric} 8165*bdd1243dSDimitry Andric 8166*bdd1243dSDimitry Andricdefm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix; 8167*bdd1243dSDimitry Andricdefm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix; 8168*bdd1243dSDimitry Andric 8169*bdd1243dSDimitry Andric// AVX-VNNI-INT8 8170*bdd1243dSDimitry Andriclet Constraints = "$src1 = $dst" in 8171*bdd1243dSDimitry Andricmulticlass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT, 8172*bdd1243dSDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 8173*bdd1243dSDimitry Andric X86MemOperand X86memop, SDNode OpNode, 8174*bdd1243dSDimitry Andric X86FoldableSchedWrite Sched, 8175*bdd1243dSDimitry Andric bit IsCommutable> { 8176*bdd1243dSDimitry Andric let isCommutable = IsCommutable in 8177*bdd1243dSDimitry Andric def rr : I<Opc, MRMSrcReg, (outs RC:$dst), 8178*bdd1243dSDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 8179*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8180*bdd1243dSDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, 8181*bdd1243dSDimitry Andric VEX_4V, Sched<[Sched]>; 8182*bdd1243dSDimitry Andric def rm : I<Opc, MRMSrcMem, (outs RC:$dst), 8183*bdd1243dSDimitry Andric (ins RC:$src1, RC:$src2, X86memop:$src3), 8184*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8185*bdd1243dSDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, 8186*bdd1243dSDimitry Andric (MemOpFrag addr:$src3))))]>, 8187*bdd1243dSDimitry Andric VEX_4V, Sched<[Sched.Folded, Sched.ReadAfterFold]>; 8188*bdd1243dSDimitry Andric} 8189*bdd1243dSDimitry Andric 8190*bdd1243dSDimitry Andriclet Predicates = [HasAVXVNNIINT8] in { 8191*bdd1243dSDimitry Andric defm VPDPBSSD : avx_dotprod_rm<0x50,"vpdpbssd", v4i32, VR128, loadv4i32, 8192*bdd1243dSDimitry Andric i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM, 8193*bdd1243dSDimitry Andric 1>, T8XD; 8194*bdd1243dSDimitry Andric defm VPDPBSSDY : avx_dotprod_rm<0x50,"vpdpbssd", v8i32, VR256, loadv8i32, 8195*bdd1243dSDimitry Andric i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM, 8196*bdd1243dSDimitry Andric 1>, VEX_L, T8XD; 8197*bdd1243dSDimitry Andric defm VPDPBUUD : avx_dotprod_rm<0x50,"vpdpbuud", v4i32, VR128, loadv4i32, 8198*bdd1243dSDimitry Andric i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM, 8199*bdd1243dSDimitry Andric 1>, T8PS; 8200*bdd1243dSDimitry Andric defm VPDPBUUDY : avx_dotprod_rm<0x50,"vpdpbuud", v8i32, VR256, loadv8i32, 8201*bdd1243dSDimitry Andric i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM, 8202*bdd1243dSDimitry Andric 1>, VEX_L, T8PS; 8203*bdd1243dSDimitry Andric defm VPDPBSSDS : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32, 8204*bdd1243dSDimitry Andric i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM, 8205*bdd1243dSDimitry Andric 1>, T8XD; 8206*bdd1243dSDimitry Andric defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32, 8207*bdd1243dSDimitry Andric i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM, 8208*bdd1243dSDimitry Andric 1>, VEX_L, T8XD; 8209*bdd1243dSDimitry Andric defm VPDPBUUDS : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32, 8210*bdd1243dSDimitry Andric i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM, 8211*bdd1243dSDimitry Andric 1>, T8PS; 8212*bdd1243dSDimitry Andric defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32, 8213*bdd1243dSDimitry Andric i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM, 8214*bdd1243dSDimitry Andric 1>, VEX_L, T8PS; 8215*bdd1243dSDimitry Andric defm VPDPBSUD : avx_dotprod_rm<0x50,"vpdpbsud", v4i32, VR128, loadv4i32, 8216*bdd1243dSDimitry Andric i128mem, X86vpdpbsud, SchedWriteVecIMul.XMM, 8217*bdd1243dSDimitry Andric 0>, T8XS; 8218*bdd1243dSDimitry Andric defm VPDPBSUDY : avx_dotprod_rm<0x50,"vpdpbsud", v8i32, VR256, loadv8i32, 8219*bdd1243dSDimitry Andric i256mem, X86vpdpbsud, SchedWriteVecIMul.YMM, 8220*bdd1243dSDimitry Andric 0>, VEX_L, T8XS; 8221*bdd1243dSDimitry Andric defm VPDPBSUDS : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32, 8222*bdd1243dSDimitry Andric i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM, 8223*bdd1243dSDimitry Andric 0>, T8XS; 8224*bdd1243dSDimitry Andric defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32, 8225*bdd1243dSDimitry Andric i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM, 8226*bdd1243dSDimitry Andric 0>, VEX_L, T8XS; 8227*bdd1243dSDimitry Andric} 8228*bdd1243dSDimitry Andric 8229*bdd1243dSDimitry Andric// AVX-NE-CONVERT 8230*bdd1243dSDimitry Andricmulticlass AVX_NE_CONVERT_BASE<bits<8> Opcode, string OpcodeStr, 8231*bdd1243dSDimitry Andric X86MemOperand MemOp128, X86MemOperand MemOp256> { 8232*bdd1243dSDimitry Andric def rm : I<Opcode, MRMSrcMem, (outs VR128:$dst), (ins MemOp128:$src), 8233*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8234*bdd1243dSDimitry Andric [(set VR128:$dst, 8235*bdd1243dSDimitry Andric (!cast<Intrinsic>("int_x86_"#OpcodeStr#"128") addr:$src))]>, 8236*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>, VEX; 8237*bdd1243dSDimitry Andric def Yrm : I<Opcode, MRMSrcMem, (outs VR256:$dst), (ins MemOp256:$src), 8238*bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8239*bdd1243dSDimitry Andric [(set VR256:$dst, 8240*bdd1243dSDimitry Andric (!cast<Intrinsic>("int_x86_"#OpcodeStr#"256") addr:$src))]>, 8241*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX, VEX_L; 8242*bdd1243dSDimitry Andric} 8243*bdd1243dSDimitry Andric 8244*bdd1243dSDimitry Andricmulticlass VCVTNEPS2BF16_BASE { 8245*bdd1243dSDimitry Andric def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 8246*bdd1243dSDimitry Andric "vcvtneps2bf16\t{$src, $dst|$dst, $src}", 8247*bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>, 8248*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>; 8249*bdd1243dSDimitry Andric def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 8250*bdd1243dSDimitry Andric "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}", 8251*bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>, 8252*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>; 8253*bdd1243dSDimitry Andric def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 8254*bdd1243dSDimitry Andric "vcvtneps2bf16\t{$src, $dst|$dst, $src}", 8255*bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>, 8256*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX_L; 8257*bdd1243dSDimitry Andric def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 8258*bdd1243dSDimitry Andric "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}", 8259*bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>, 8260*bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX_L; 8261*bdd1243dSDimitry Andric} 8262*bdd1243dSDimitry Andric 8263*bdd1243dSDimitry Andriclet Predicates = [HasAVXNECONVERT] in { 8264*bdd1243dSDimitry Andric defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem, 8265*bdd1243dSDimitry Andric f16mem>, T8XS; 8266*bdd1243dSDimitry Andric defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>, 8267*bdd1243dSDimitry Andric T8PD; 8268*bdd1243dSDimitry Andric defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem, 8269*bdd1243dSDimitry Andric f256mem>, T8XS; 8270*bdd1243dSDimitry Andric defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem, 8271*bdd1243dSDimitry Andric f256mem>, T8PD; 8272*bdd1243dSDimitry Andric defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem, 8273*bdd1243dSDimitry Andric f256mem>, T8XD; 8274*bdd1243dSDimitry Andric defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem, 8275*bdd1243dSDimitry Andric f256mem>, T8PS; 8276*bdd1243dSDimitry Andric let checkVEXPredicate = 1 in 8277*bdd1243dSDimitry Andric defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; 8278*bdd1243dSDimitry Andric} 8279*bdd1243dSDimitry Andric 8280*bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", 8281*bdd1243dSDimitry Andric (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">; 8282*bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}", 8283*bdd1243dSDimitry Andric (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">; 8284