10b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions, 100b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation, 110b57cec5SDimitry Andric// machine code emission, and analysis. 120b57cec5SDimitry Andric// 130b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes 170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 20fe6060f1SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 210b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop, 220b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 230b57cec5SDimitry Andric bit Is2Addr = 1> { 240b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 250b57cec5SDimitry Andric let isCommutable = 1 in { 260b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 270b57cec5SDimitry Andric !if(Is2Addr, 280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 300b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 310b57cec5SDimitry Andric Sched<[sched]>; 320b57cec5SDimitry Andric } 330b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 340b57cec5SDimitry Andric !if(Is2Addr, 350b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 360b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 370b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 380b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 390b57cec5SDimitry Andric} 400b57cec5SDimitry Andric} 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 43349cc55cSDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, 440b57cec5SDimitry Andric SDPatternOperator OpNode, RegisterClass RC, 450b57cec5SDimitry Andric ValueType VT, string asm, Operand memopr, 465ffd83dbSDimitry Andric PatFrags mem_frags, Domain d, 470b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 480b57cec5SDimitry Andriclet hasSideEffects = 0 in { 490b57cec5SDimitry Andric def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 500b57cec5SDimitry Andric !if(Is2Addr, 510b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 520b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 530b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 540b57cec5SDimitry Andric Sched<[sched]>; 550b57cec5SDimitry Andric let mayLoad = 1 in 560b57cec5SDimitry Andric def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 570b57cec5SDimitry Andric !if(Is2Addr, 580b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 590b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 605ffd83dbSDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>, 610b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 620b57cec5SDimitry Andric} 630b57cec5SDimitry Andric} 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class 66fe6060f1SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 670b57cec5SDimitry Andric RegisterClass RC, ValueType vt, 680b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, 690b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 700b57cec5SDimitry Andric bit Is2Addr = 1> { 710b57cec5SDimitry Andric let isCommutable = 1 in 720b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 730b57cec5SDimitry Andric !if(Is2Addr, 740b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 750b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 760b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 770b57cec5SDimitry Andric Sched<[sched]>; 780b57cec5SDimitry Andric let mayLoad = 1 in 790b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 800b57cec5SDimitry Andric !if(Is2Addr, 810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 830b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 840b57cec5SDimitry Andric d>, 850b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 860b57cec5SDimitry Andric} 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 890b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 900b57cec5SDimitry Andric string OpcodeStr, X86MemOperand x86memop, 910b57cec5SDimitry Andric X86FoldableSchedWrite sched, 920b57cec5SDimitry Andric list<dag> pat_rr, list<dag> pat_rm, 930b57cec5SDimitry Andric bit Is2Addr = 1> { 940b57cec5SDimitry Andric let isCommutable = 1, hasSideEffects = 0 in 950b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 960b57cec5SDimitry Andric !if(Is2Addr, 970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 990b57cec5SDimitry Andric pat_rr, d>, 1000b57cec5SDimitry Andric Sched<[sched]>; 1010b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 1020b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 1030b57cec5SDimitry Andric !if(Is2Addr, 1040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 1050b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1060b57cec5SDimitry Andric pat_rm, d>, 1070b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1080b57cec5SDimitry Andric} 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 1120b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos. 1130b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1140b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 11581ad6265SDimitry Andric def FsFLD0SH : I<0, Pseudo, (outs FR16:$dst), (ins), "", 11681ad6265SDimitry Andric [(set FR16:$dst, fp16imm0)]>, Requires<[HasSSE2, NoAVX512]>; 1170b57cec5SDimitry Andric def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 1180b57cec5SDimitry Andric [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1190b57cec5SDimitry Andric def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 1208bcb0991SDimitry Andric [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; 1218bcb0991SDimitry Andric def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1228bcb0991SDimitry Andric [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; 1230b57cec5SDimitry Andric} 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1260b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors 1270b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse. 1300b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 1310b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor. 1320b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1330b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial. 1340b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1358bcb0991SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1360b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1370b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 immAllZerosV))]>; 1380b57cec5SDimitry Andric} 1390b57cec5SDimitry Andric 1408bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 1418bcb0991SDimitry Andricdef : Pat<(v16i8 immAllZerosV), (V_SET0)>; 1428bcb0991SDimitry Andricdef : Pat<(v8i16 immAllZerosV), (V_SET0)>; 143fcaf7f86SDimitry Andricdef : Pat<(v8f16 immAllZerosV), (V_SET0)>; 1440b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>; 1458bcb0991SDimitry Andricdef : Pat<(v2i64 immAllZerosV), (V_SET0)>; 1468bcb0991SDimitry Andricdef : Pat<(v2f64 immAllZerosV), (V_SET0)>; 1478bcb0991SDimitry Andric} 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 1510b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero 1520b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY 1530b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty 1540b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1550b57cec5SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 1560b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1570b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllZerosV))]>; 1580b57cec5SDimitry Andric} 1590b57cec5SDimitry Andric 1608bcb0991SDimitry Andriclet Predicates = [NoAVX512] in { 1618bcb0991SDimitry Andricdef : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 1628bcb0991SDimitry Andricdef : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 163fcaf7f86SDimitry Andricdef : Pat<(v16f16 immAllZerosV), (AVX_SET0)>; 1648bcb0991SDimitry Andricdef : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 1658bcb0991SDimitry Andricdef : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; 1668bcb0991SDimitry Andricdef : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 1678bcb0991SDimitry Andric} 1688bcb0991SDimitry Andric 1690b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 1700b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial. 1710b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 1720b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 1730b57cec5SDimitry Andric def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 1740b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 immAllOnesV))]>; 1750b57cec5SDimitry Andric let Predicates = [HasAVX1Only, OptForMinSize] in { 1760b57cec5SDimitry Andric def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 1770b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric let Predicates = [HasAVX2] in 1800b57cec5SDimitry Andric def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 1810b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 1820b57cec5SDimitry Andric} 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1850b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions 1860b57cec5SDimitry Andric// 1870b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 1880b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register 1890b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 1900b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we 1910b57cec5SDimitry Andric// don't use movss/movsd for copies. 1920b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1930b57cec5SDimitry Andric 194349cc55cSDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc, 19506c3fb27SDimitry Andric string asm_opr, Domain d> { 1960b57cec5SDimitry Andric let isCommutable = 1 in 1970b57cec5SDimitry Andric def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 1980b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 1990b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), 2000b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 2010b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric // For the disassembler 2040b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 2050b57cec5SDimitry Andric def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 2060b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 2070b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), []>, 20806c3fb27SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 2090b57cec5SDimitry Andric} 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 2120b57cec5SDimitry Andric X86MemOperand x86memop, string OpcodeStr, 21306c3fb27SDimitry Andric Domain d, Predicate pred> { 2140b57cec5SDimitry Andric // AVX 2150b57cec5SDimitry Andric let Predicates = [UseAVX, OptForSize] in 216349cc55cSDimitry Andric defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 21706c3fb27SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>, 218cb14a3feSDimitry Andric VEX, VVVV, VEX_LIG, WIG; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2210b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2220b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 22306c3fb27SDimitry Andric VEX, VEX_LIG, Sched<[WriteFStore]>, WIG; 2240b57cec5SDimitry Andric // SSE1 & 2 2250b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2260b57cec5SDimitry Andric let Predicates = [pred, NoSSE41_Or_OptForSize] in 227349cc55cSDimitry Andric defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 22806c3fb27SDimitry Andric "\t{$src2, $dst|$dst, $src2}", d>; 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 2320b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2330b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 2340b57cec5SDimitry Andric Sched<[WriteFStore]>; 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2370b57cec5SDimitry Andric (!cast<Instruction>("V"#NAME#"rr_REV") 2380b57cec5SDimitry Andric VR128:$dst, VR128:$src1, VR128:$src2), 0>; 2390b57cec5SDimitry Andric def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 2400b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rr_REV") 2410b57cec5SDimitry Andric VR128:$dst, VR128:$src2), 0>; 2420b57cec5SDimitry Andric} 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits. 2450b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 2460b57cec5SDimitry Andric PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 2470b57cec5SDimitry Andric Domain d> { 2480b57cec5SDimitry Andric def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2500b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 25106c3fb27SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG; 2520b57cec5SDimitry Andric def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 2530b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2540b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 2550b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric // _alt version uses FR32/FR64 register class. 2580b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 2590b57cec5SDimitry Andric def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2600b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2610b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 26206c3fb27SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG; 2630b57cec5SDimitry Andric def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 2640b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2650b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 2660b57cec5SDimitry Andric Sched<[WriteFLoad]>; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric} 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 271cb14a3feSDimitry Andric SSEPackedSingle, UseSSE1>, TB, XS; 2720b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 273cb14a3feSDimitry Andric SSEPackedDouble, UseSSE2>, TB, XD; 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in { 2760b57cec5SDimitry Andric defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 277cb14a3feSDimitry Andric SSEPackedSingle>, TB, XS; 2780b57cec5SDimitry Andric defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 279cb14a3feSDimitry Andric SSEPackedDouble>, TB, XD; 2800b57cec5SDimitry Andric} 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric// Patterns 2830b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 2840b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 2850b57cec5SDimitry Andric (VMOVSSrm addr:$src)>; 2860b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 2870b57cec5SDimitry Andric (VMOVSDrm addr:$src)>; 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric // Represent the same patterns above but in the form they appear for 2900b57cec5SDimitry Andric // 256-bit types 2910b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzload32 addr:$src)), 2920b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 2930b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzload64 addr:$src)), 2940b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 2950b57cec5SDimitry Andric} 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 2980b57cec5SDimitry Andric // Move scalar to XMM zero-extended, zeroing a VR128 then do a 2990b57cec5SDimitry Andric // MOVSS to the lower bits. 3000b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 3010b57cec5SDimitry Andric (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3020b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3030b57cec5SDimitry Andric (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric // Move low f32 and clear high bits. 3060b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 3070b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3080b57cec5SDimitry Andric (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 3090b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 3100b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 3110b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 3120b57cec5SDimitry Andric (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 3130b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 3140b57cec5SDimitry Andric} 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 3170b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a 3180b57cec5SDimitry Andric// MOVSS to the lower bits. 3190b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 3200b57cec5SDimitry Andric (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 3210b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 3220b57cec5SDimitry Andric (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 3230b57cec5SDimitry Andric} 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andriclet Predicates = [UseSSE2] in 3260b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 3270b57cec5SDimitry Andric (MOVSDrm addr:$src)>; 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andriclet Predicates = [UseSSE1] in 3300b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 3310b57cec5SDimitry Andric (MOVSSrm addr:$src)>; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3340b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 3350b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC, 3380b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag ld_frag, 3390b57cec5SDimitry Andric string asm, Domain d, 3400b57cec5SDimitry Andric X86SchedWriteMoveLS sched> { 3410b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in 3420b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 3430b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 3440b57cec5SDimitry Andric Sched<[sched.RR]>; 3450b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in 3460b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 3470b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3480b57cec5SDimitry Andric [(set RC:$dst, (ld_frag addr:$src))], d>, 3490b57cec5SDimitry Andric Sched<[sched.RM]>; 3500b57cec5SDimitry Andric} 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3530b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3540b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 355cb14a3feSDimitry Andric TB, VEX, WIG; 3560b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3570b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 358cb14a3feSDimitry Andric TB, PD, VEX, WIG; 3590b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3600b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 361cb14a3feSDimitry Andric TB, VEX, WIG; 3620b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3630b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 364cb14a3feSDimitry Andric TB, PD, VEX, WIG; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 3670b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 368cb14a3feSDimitry Andric TB, VEX, VEX_L, WIG; 3690b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 3700b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 371cb14a3feSDimitry Andric TB, PD, VEX, VEX_L, WIG; 3720b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 3730b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 374cb14a3feSDimitry Andric TB, VEX, VEX_L, WIG; 3750b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 3760b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 377cb14a3feSDimitry Andric TB, PD, VEX, VEX_L, WIG; 3780b57cec5SDimitry Andric} 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 3810b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 3820b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 383cb14a3feSDimitry Andric TB; 3840b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 3850b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 386cb14a3feSDimitry Andric TB; 3870b57cec5SDimitry Andric} 3880b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 3890b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 3900b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 391cb14a3feSDimitry Andric TB, PD; 3920b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 3930b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 394cb14a3feSDimitry Andric TB, PD; 3950b57cec5SDimitry Andric} 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3980b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 3990b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4000b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4010b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 40206c3fb27SDimitry Andric VEX, WIG; 4030b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4040b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4050b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 40606c3fb27SDimitry Andric VEX, WIG; 4070b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4080b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4090b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>, 41006c3fb27SDimitry Andric VEX, WIG; 4110b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4120b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4130b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>, 41406c3fb27SDimitry Andric VEX, WIG; 4150b57cec5SDimitry Andric} // SchedRW 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 4180b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4190b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 4200b57cec5SDimitry Andric [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 42106c3fb27SDimitry Andric VEX, VEX_L, WIG; 4220b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4230b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 4240b57cec5SDimitry Andric [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 42506c3fb27SDimitry Andric VEX, VEX_L, WIG; 4260b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4270b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 4280b57cec5SDimitry Andric [(store (v8f32 VR256:$src), addr:$dst)]>, 42906c3fb27SDimitry Andric VEX, VEX_L, WIG; 4300b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 4310b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 4320b57cec5SDimitry Andric [(store (v4f64 VR256:$src), addr:$dst)]>, 43306c3fb27SDimitry Andric VEX, VEX_L, WIG; 4340b57cec5SDimitry Andric} // SchedRW 4350b57cec5SDimitry Andric} // Predicate 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric// For disassembler 4380b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4390b57cec5SDimitry Andric isMoveReg = 1 in { 4400b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 4410b57cec5SDimitry Andric def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 4420b57cec5SDimitry Andric (ins VR128:$src), 4430b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 44406c3fb27SDimitry Andric VEX, WIG; 4450b57cec5SDimitry Andric def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 4460b57cec5SDimitry Andric (ins VR128:$src), 4470b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 44806c3fb27SDimitry Andric VEX, WIG; 4490b57cec5SDimitry Andric def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 4500b57cec5SDimitry Andric (ins VR128:$src), 4510b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 45206c3fb27SDimitry Andric VEX, WIG; 4530b57cec5SDimitry Andric def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 4540b57cec5SDimitry Andric (ins VR128:$src), 4550b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 45606c3fb27SDimitry Andric VEX, WIG; 4570b57cec5SDimitry Andric} // SchedRW 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 4600b57cec5SDimitry Andric def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 4610b57cec5SDimitry Andric (ins VR256:$src), 4620b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 46306c3fb27SDimitry Andric VEX, VEX_L, WIG; 4640b57cec5SDimitry Andric def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 4650b57cec5SDimitry Andric (ins VR256:$src), 4660b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 46706c3fb27SDimitry Andric VEX, VEX_L, WIG; 4680b57cec5SDimitry Andric def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 4690b57cec5SDimitry Andric (ins VR256:$src), 4700b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 47106c3fb27SDimitry Andric VEX, VEX_L, WIG; 4720b57cec5SDimitry Andric def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 4730b57cec5SDimitry Andric (ins VR256:$src), 4740b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 47506c3fb27SDimitry Andric VEX, VEX_L, WIG; 4760b57cec5SDimitry Andric} // SchedRW 4770b57cec5SDimitry Andric} // Predicate 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 4800b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4810b57cec5SDimitry Andric (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 4820b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4830b57cec5SDimitry Andric (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 4840b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4850b57cec5SDimitry Andric (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 4860b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4870b57cec5SDimitry Andric (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 4880b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 4890b57cec5SDimitry Andric (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 4900b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 4910b57cec5SDimitry Andric (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 4920b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 4930b57cec5SDimitry Andric (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 4940b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 4950b57cec5SDimitry Andric (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 4980b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 4990b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 5000b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 5010b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5020b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 5030b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 5040b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5050b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 5060b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>; 5070b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 5080b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 5090b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>; 5100b57cec5SDimitry Andric} // SchedRW 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric// For disassembler 5130b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 5140b57cec5SDimitry Andric isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 5150b57cec5SDimitry Andric def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 51606c3fb27SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>; 5170b57cec5SDimitry Andric def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 51806c3fb27SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>; 5190b57cec5SDimitry Andric def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 52006c3fb27SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>; 5210b57cec5SDimitry Andric def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 52206c3fb27SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>; 5230b57cec5SDimitry Andric} 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 5260b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 5270b57cec5SDimitry Andric (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 5280b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 5290b57cec5SDimitry Andric (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 5300b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 5310b57cec5SDimitry Andric (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 5320b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 5330b57cec5SDimitry Andric (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5360b57cec5SDimitry Andric // 256-bit load/store need to use floating point load/store in case we don't 5370b57cec5SDimitry Andric // have AVX2. Execution domain fixing will convert to integer if AVX2 is 5380b57cec5SDimitry Andric // available and changing the domain is beneficial. 5390b57cec5SDimitry Andric def : Pat<(alignedloadv4i64 addr:$src), 5400b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5410b57cec5SDimitry Andric def : Pat<(alignedloadv8i32 addr:$src), 5420b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5430b57cec5SDimitry Andric def : Pat<(alignedloadv16i16 addr:$src), 5440b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5450b57cec5SDimitry Andric def : Pat<(alignedloadv32i8 addr:$src), 5460b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 5470b57cec5SDimitry Andric def : Pat<(loadv4i64 addr:$src), 5480b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5490b57cec5SDimitry Andric def : Pat<(loadv8i32 addr:$src), 5500b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5510b57cec5SDimitry Andric def : Pat<(loadv16i16 addr:$src), 5520b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5530b57cec5SDimitry Andric def : Pat<(loadv32i8 addr:$src), 5540b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 5570b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5580b57cec5SDimitry Andric def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 5590b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5600b57cec5SDimitry Andric def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 5610b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5620b57cec5SDimitry Andric def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 5630b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 5640b57cec5SDimitry Andric def : Pat<(store (v4i64 VR256:$src), addr:$dst), 5650b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5660b57cec5SDimitry Andric def : Pat<(store (v8i32 VR256:$src), addr:$dst), 5670b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5680b57cec5SDimitry Andric def : Pat<(store (v16i16 VR256:$src), addr:$dst), 5690b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 5700b57cec5SDimitry Andric def : Pat<(store (v32i8 VR256:$src), addr:$dst), 5710b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 572fcaf7f86SDimitry Andric 573fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 574fcaf7f86SDimitry Andric (VMOVAPSrm addr:$src)>; 5751ac55f4cSDimitry Andric def : Pat<(alignedloadv8bf16 addr:$src), 5761ac55f4cSDimitry Andric (VMOVAPSrm addr:$src)>; 577fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 578fcaf7f86SDimitry Andric (VMOVUPSrm addr:$src)>; 5791ac55f4cSDimitry Andric def : Pat<(loadv8bf16 addr:$src), 5801ac55f4cSDimitry Andric (VMOVUPSrm addr:$src)>; 581fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 582fcaf7f86SDimitry Andric (VMOVAPSmr addr:$dst, VR128:$src)>; 5831ac55f4cSDimitry Andric def : Pat<(alignedstore (v8bf16 VR128:$src), addr:$dst), 5841ac55f4cSDimitry Andric (VMOVAPSmr addr:$dst, VR128:$src)>; 585fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 586fcaf7f86SDimitry Andric (VMOVUPSmr addr:$dst, VR128:$src)>; 5871ac55f4cSDimitry Andric def : Pat<(store (v8bf16 VR128:$src), addr:$dst), 5881ac55f4cSDimitry Andric (VMOVUPSmr addr:$dst, VR128:$src)>; 5891ac55f4cSDimitry Andric 590fcaf7f86SDimitry Andric def : Pat<(alignedloadv16f16 addr:$src), 591fcaf7f86SDimitry Andric (VMOVAPSYrm addr:$src)>; 5921ac55f4cSDimitry Andric def : Pat<(alignedloadv16bf16 addr:$src), 5931ac55f4cSDimitry Andric (VMOVAPSYrm addr:$src)>; 594fcaf7f86SDimitry Andric def : Pat<(loadv16f16 addr:$src), 595fcaf7f86SDimitry Andric (VMOVUPSYrm addr:$src)>; 5961ac55f4cSDimitry Andric def : Pat<(loadv16bf16 addr:$src), 5971ac55f4cSDimitry Andric (VMOVUPSYrm addr:$src)>; 598fcaf7f86SDimitry Andric def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst), 599fcaf7f86SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 6001ac55f4cSDimitry Andric def : Pat<(alignedstore (v16bf16 VR256:$src), addr:$dst), 6011ac55f4cSDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 602fcaf7f86SDimitry Andric def : Pat<(store (v16f16 VR256:$src), addr:$dst), 603fcaf7f86SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 6041ac55f4cSDimitry Andric def : Pat<(store (v16bf16 VR256:$src), addr:$dst), 6051ac55f4cSDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 6060b57cec5SDimitry Andric} 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter). 6090b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU 6100b57cec5SDimitry Andric// during the SSE domain pass. 6110b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 6120b57cec5SDimitry Andric def : Pat<(alignedloadv2i64 addr:$src), 6130b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6140b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 6150b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6160b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 6170b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6180b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 6190b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 6200b57cec5SDimitry Andric def : Pat<(loadv2i64 addr:$src), 6210b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6220b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 6230b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6240b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 6250b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6260b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 6270b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 6300b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6310b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 6320b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6330b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 6340b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6350b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 6360b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 6370b57cec5SDimitry Andric def : Pat<(store (v2i64 VR128:$src), addr:$dst), 6380b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6390b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 6400b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6410b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 6420b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6430b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 6440b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 6450b57cec5SDimitry Andric} 6460b57cec5SDimitry Andric 647fcaf7f86SDimitry Andriclet Predicates = [UseSSE2] in { 648fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 649fcaf7f86SDimitry Andric (MOVAPSrm addr:$src)>; 650fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 651fcaf7f86SDimitry Andric (MOVUPSrm addr:$src)>; 652fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 653fcaf7f86SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 654fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 655fcaf7f86SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 656fcaf7f86SDimitry Andric} 657fcaf7f86SDimitry Andric 6580b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6590b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions 6600b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6610b57cec5SDimitry Andric 662fe6060f1SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, 6630b57cec5SDimitry Andric string base_opc, string asm_opr> { 6640b57cec5SDimitry Andric // No pattern as they need be special cased between high and low. 6650b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 6660b57cec5SDimitry Andric def PSrm : PI<opc, MRMSrcMem, 6670b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6680b57cec5SDimitry Andric !strconcat(base_opc, "s", asm_opr), 669cb14a3feSDimitry Andric [], SSEPackedSingle>, TB, 6700b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric def PDrm : PI<opc, MRMSrcMem, 6730b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 6740b57cec5SDimitry Andric !strconcat(base_opc, "d", asm_opr), 6750b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 6760b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src2)))))], 677cb14a3feSDimitry Andric SSEPackedDouble>, TB, PD, 6780b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 6790b57cec5SDimitry Andric} 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 6820b57cec5SDimitry Andric string base_opc> { 6830b57cec5SDimitry Andric let Predicates = [UseAVX] in 6840b57cec5SDimitry Andric defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6850b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 686cb14a3feSDimitry Andric VEX, VVVV, WIG; 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 6890b57cec5SDimitry Andric defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 6900b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}">; 6910b57cec5SDimitry Andric} 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 6960b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 6970b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 6980b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6990b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 7000b57cec5SDimitry Andric []>, 70106c3fb27SDimitry Andric VEX, WIG; 7020b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7030b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 7040b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 7050b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 70606c3fb27SDimitry Andric VEX, WIG; 7070b57cec5SDimitry Andric}// UseAVX 7080b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7090b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7100b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 7110b57cec5SDimitry Andric []>; 7120b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7130b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 7140b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 7150b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 7160b57cec5SDimitry Andric} // SchedRW 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 7190b57cec5SDimitry Andric // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 7200b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 7210b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 7228bcb0991SDimitry Andric def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, 7230b57cec5SDimitry Andric (i8 -28)), 7240b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 7250b57cec5SDimitry Andric def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 7260b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzload64 addr:$src)), 7290b57cec5SDimitry Andric (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 7300b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 7310b57cec5SDimitry Andric (MOVLPSmr addr:$dst, VR128:$src)>; 7320b57cec5SDimitry Andric} 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7350b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions 7360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 7410b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low 7420b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible. 7430b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7440b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7450b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7460b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 74706c3fb27SDimitry Andric []>, VEX, WIG; 7480b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7490b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7500b57cec5SDimitry Andric [(store (f64 (extractelt 7510b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 75206c3fb27SDimitry Andric (iPTR 0))), addr:$dst)]>, VEX, WIG; 7530b57cec5SDimitry Andric} // UseAVX 7540b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 7550b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7560b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 7570b57cec5SDimitry Andric []>; 7580b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7590b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 7600b57cec5SDimitry Andric [(store (f64 (extractelt 7610b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 7620b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 7630b57cec5SDimitry Andric} // SchedRW 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 7665ffd83dbSDimitry Andric // MOVHPD patterns 7670b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7680b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7710b57cec5SDimitry Andric (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 7720b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7730b57cec5SDimitry Andric (VMOVHPDmr addr:$dst, VR128:$src)>; 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric // MOVLPD patterns 7760b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 7770b57cec5SDimitry Andric (VMOVLPDrm VR128:$src1, addr:$src2)>; 7780b57cec5SDimitry Andric} 7790b57cec5SDimitry Andric 7800b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 7810b57cec5SDimitry Andric // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 7820b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 7830b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 7848bcb0991SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), 7850b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7860b57cec5SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 7870b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 7900b57cec5SDimitry Andric addr:$dst), 7910b57cec5SDimitry Andric (MOVHPSmr addr:$dst, VR128:$src)>; 7920b57cec5SDimitry Andric} 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 7950b57cec5SDimitry Andric // MOVHPD patterns 7960b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 7970b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 8000b57cec5SDimitry Andric (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 8010b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 8020b57cec5SDimitry Andric (MOVHPDmr addr:$dst, VR128:$src)>; 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric // MOVLPD patterns 8050b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 8060b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 8070b57cec5SDimitry Andric} 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 8100b57cec5SDimitry Andric // Use MOVLPD to load into the low bits from a full vector unless we can use 8110b57cec5SDimitry Andric // BLENDPD. 8128bcb0991SDimitry Andric def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), 8130b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 8140b57cec5SDimitry Andric} 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8170b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 8180b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 8210b57cec5SDimitry Andric def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 8220b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8230b57cec5SDimitry Andric "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8240b57cec5SDimitry Andric [(set VR128:$dst, 8250b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 826cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG; 8270b57cec5SDimitry Andric let isCommutable = 1 in 8280b57cec5SDimitry Andric def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 8290b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8300b57cec5SDimitry Andric "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8310b57cec5SDimitry Andric [(set VR128:$dst, 8320b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 833cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>, WIG; 8340b57cec5SDimitry Andric} 8350b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 8360b57cec5SDimitry Andric def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 8370b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8380b57cec5SDimitry Andric "movlhps\t{$src2, $dst|$dst, $src2}", 8390b57cec5SDimitry Andric [(set VR128:$dst, 8400b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 8410b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 8420b57cec5SDimitry Andric let isCommutable = 1 in 8430b57cec5SDimitry Andric def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 8440b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 8450b57cec5SDimitry Andric "movhlps\t{$src2, $dst|$dst, $src2}", 8460b57cec5SDimitry Andric [(set VR128:$dst, 8470b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 84806c3fb27SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 8490b57cec5SDimitry Andric} 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8520b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions 8530b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 856fe6060f1SDimitry Andric SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag, 8570b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 858480093f4SDimitry Andric Domain d, 8590b57cec5SDimitry Andric SchedRead Int2Fpu = ReadDefault> { 860480093f4SDimitry Andric let ExeDomain = d in { 8610b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 8620b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 8630b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 8640b57cec5SDimitry Andric Sched<[sched, Int2Fpu]>; 8650b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 8660b57cec5SDimitry Andric mem#"\t{$src, $dst|$dst, $src}", 8670b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 8680b57cec5SDimitry Andric Sched<[sched.Folded]>; 8690b57cec5SDimitry Andric } 870480093f4SDimitry Andric} 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 8730b57cec5SDimitry Andric ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 8740b57cec5SDimitry Andric string asm, Domain d, X86FoldableSchedWrite sched> { 875480093f4SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { 8760b57cec5SDimitry Andric def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 877480093f4SDimitry Andric [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>, 8780b57cec5SDimitry Andric Sched<[sched]>; 8790b57cec5SDimitry Andric let mayLoad = 1 in 8800b57cec5SDimitry Andric def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 881480093f4SDimitry Andric [(set RC:$dst, (DstTy (any_sint_to_fp 8820b57cec5SDimitry Andric (SrcTy (ld_frag addr:$src)))))], d>, 8830b57cec5SDimitry Andric Sched<[sched.Folded]>; 8840b57cec5SDimitry Andric} 8850b57cec5SDimitry Andric} 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 8880b57cec5SDimitry Andric X86MemOperand x86memop, string asm, string mem, 889480093f4SDimitry Andric X86FoldableSchedWrite sched, Domain d> { 890480093f4SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { 8910b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 8920b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 8930b57cec5SDimitry Andric Sched<[sched, ReadDefault, ReadInt2Fpu]>; 8940b57cec5SDimitry Andric let mayLoad = 1 in 8950b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 8960b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src), 8970b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 8980b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 8990b57cec5SDimitry Andric} // hasSideEffects = 0 9000b57cec5SDimitry Andric} 9010b57cec5SDimitry Andric 902480093f4SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 903480093f4SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 9040b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 905480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 906cb14a3feSDimitry Andric TB, XS, VEX, VEX_LIG; 907480093f4SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 9080b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 909480093f4SDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 910cb14a3feSDimitry Andric TB, XS, VEX, REX_W, VEX_LIG; 911480093f4SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 9120b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 913480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 914cb14a3feSDimitry Andric TB, XD, VEX, VEX_LIG; 915480093f4SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 9160b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 917480093f4SDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 918cb14a3feSDimitry Andric TB, XD, VEX, REX_W, VEX_LIG; 9195ffd83dbSDimitry Andric 9205ffd83dbSDimitry Andricdefm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 9215ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9225ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 923cb14a3feSDimitry Andric TB, XS, VEX, VEX_LIG; 9245ffd83dbSDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 9255ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 9265ffd83dbSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, 927cb14a3feSDimitry Andric TB, XS, VEX, REX_W, VEX_LIG; 9285ffd83dbSDimitry Andricdefm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 9295ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9305ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 931cb14a3feSDimitry Andric TB, XD, VEX, VEX_LIG; 9325ffd83dbSDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 9335ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 9345ffd83dbSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, 935cb14a3feSDimitry Andric TB, XD, VEX, REX_W, VEX_LIG; 9360b57cec5SDimitry Andric} 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx 9390b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands, 9400b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly 9410b57cec5SDimitry Andric// where appropriate to do so. 9420b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 9430b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 944cb14a3feSDimitry Andric WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV, 945480093f4SDimitry Andric VEX_LIG, SIMD_EXC; 9460b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 947cb14a3feSDimitry Andric WriteCvtI2SS, SSEPackedSingle>, TB, XS, VEX, VVVV, 94806c3fb27SDimitry Andric REX_W, VEX_LIG, SIMD_EXC; 9490b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 950cb14a3feSDimitry Andric WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV, 951480093f4SDimitry Andric VEX_LIG; 9520b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 953cb14a3feSDimitry Andric WriteCvtI2SD, SSEPackedDouble>, TB, XD, VEX, VVVV, 95406c3fb27SDimitry Andric REX_W, VEX_LIG, SIMD_EXC; 9550b57cec5SDimitry Andric} // isCodeGenOnly = 1 9560b57cec5SDimitry Andric 9570b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 958480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 9590b57cec5SDimitry Andric (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 960480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 9610b57cec5SDimitry Andric (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 962480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 9630b57cec5SDimitry Andric (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 964480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 9650b57cec5SDimitry Andric (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 9660b57cec5SDimitry Andric 967480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp GR32:$src)), 9680b57cec5SDimitry Andric (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 969480093f4SDimitry Andric def : Pat<(f32 (any_sint_to_fp GR64:$src)), 9700b57cec5SDimitry Andric (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 971480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp GR32:$src)), 9720b57cec5SDimitry Andric (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 973480093f4SDimitry Andric def : Pat<(f64 (any_sint_to_fp GR64:$src)), 9740b57cec5SDimitry Andric (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 9755ffd83dbSDimitry Andric 9765ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; 9775ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; 9785ffd83dbSDimitry Andric 9795ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; 9805ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; 9810b57cec5SDimitry Andric} 9820b57cec5SDimitry Andric 9830b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 984480093f4SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 9850b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 986cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC; 987480093f4SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 9880b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 989cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC; 990480093f4SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 9910b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 992cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC; 993480093f4SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 9940b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 995cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC; 9965ffd83dbSDimitry Andric 9975ffd83dbSDimitry Andricdefm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 9985ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 999cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, SIMD_EXC; 10005ffd83dbSDimitry Andricdefm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 10015ffd83dbSDimitry Andric "cvtss2si", "cvtss2si", 1002cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W, SIMD_EXC; 10035ffd83dbSDimitry Andricdefm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 10045ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 1005cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, SIMD_EXC; 10065ffd83dbSDimitry Andricdefm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 10075ffd83dbSDimitry Andric "cvtsd2si", "cvtsd2si", 1008cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, REX_W, SIMD_EXC; 10095ffd83dbSDimitry Andric 1010480093f4SDimitry Andricdefm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, 10110b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{l}", 1012cb14a3feSDimitry Andric WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, SIMD_EXC; 1013480093f4SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, 10140b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{q}", 1015cb14a3feSDimitry Andric WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, TB, XS, REX_W, SIMD_EXC; 1016480093f4SDimitry Andricdefm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, 10170b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{l}", 1018cb14a3feSDimitry Andric WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD; 1019480093f4SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, 10200b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{q}", 1021cb14a3feSDimitry Andric WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, TB, XD, REX_W, SIMD_EXC; 10220b57cec5SDimitry Andric} // isCodeGenOnly = 1 10230b57cec5SDimitry Andric 10245ffd83dbSDimitry Andriclet Predicates = [UseSSE1] in { 10255ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; 10265ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; 10275ffd83dbSDimitry Andric} 10285ffd83dbSDimitry Andric 10295ffd83dbSDimitry Andriclet Predicates = [UseSSE2] in { 10305ffd83dbSDimitry Andric def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; 10315ffd83dbSDimitry Andric def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; 10325ffd83dbSDimitry Andric} 10335ffd83dbSDimitry Andric 10340b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM 10350b57cec5SDimitry Andric// and/or XMM operand(s). 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 10380b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, SDNode OpNode, 10395ffd83dbSDimitry Andric Operand memop, PatFrags mem_frags, string asm, 1040480093f4SDimitry Andric X86FoldableSchedWrite sched, Domain d> { 1041480093f4SDimitry Andriclet ExeDomain = d in { 10420b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 10430b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 10440b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 10450b57cec5SDimitry Andric Sched<[sched]>; 10460b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 10470b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 10485ffd83dbSDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>, 10490b57cec5SDimitry Andric Sched<[sched.Folded]>; 10500b57cec5SDimitry Andric} 1051480093f4SDimitry Andric} 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 10540b57cec5SDimitry Andric RegisterClass DstRC, X86MemOperand x86memop, 10550b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 1056480093f4SDimitry Andric Domain d, bit Is2Addr = 1> { 1057480093f4SDimitry Andriclet hasSideEffects = 0, ExeDomain = d in { 10580b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 10590b57cec5SDimitry Andric !if(Is2Addr, 10600b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 10610b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 10620b57cec5SDimitry Andric []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 10630b57cec5SDimitry Andric let mayLoad = 1 in 10640b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 10650b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src2), 10660b57cec5SDimitry Andric !if(Is2Addr, 10670b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 10680b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 10690b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 10700b57cec5SDimitry Andric} 10710b57cec5SDimitry Andric} 10720b57cec5SDimitry Andric 1073480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 10740b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10750b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 10760b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1077cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG; 10780b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 10790b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1080cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD, VEX, REX_W, VEX_LIG; 10810b57cec5SDimitry Andric} 10820b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 1083480093f4SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1084cb14a3feSDimitry Andric SSEPackedDouble>, TB, XD; 10850b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 1086480093f4SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1087cb14a3feSDimitry Andric SSEPackedDouble>, TB, XD, REX_W; 1088480093f4SDimitry Andric} 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 10910b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1092480093f4SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, 1093cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, SIMD_EXC; 10940b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1095480093f4SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, 1096cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC; 10970b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1098480093f4SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, 1099cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG; 11000b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1101480093f4SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, 1102cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, REX_W, SIMD_EXC; 11030b57cec5SDimitry Andric} 11040b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 11050b57cec5SDimitry Andric defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1106480093f4SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, 1107cb14a3feSDimitry Andric TB, XS, SIMD_EXC; 11080b57cec5SDimitry Andric defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1109480093f4SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, 1110cb14a3feSDimitry Andric TB, XS, REX_W, SIMD_EXC; 11110b57cec5SDimitry Andric defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1112480093f4SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, 1113cb14a3feSDimitry Andric TB, XD; 11140b57cec5SDimitry Andric defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1115480093f4SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, 1116cb14a3feSDimitry Andric TB, XD, REX_W, SIMD_EXC; 11170b57cec5SDimitry Andric} 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11200b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 11210b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11220b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 11230b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11240b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 11250b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11260b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 11290b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 11300b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 11310b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 11340b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 11350b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 11360b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 11370b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 11380b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 11390b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 11400b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 11430b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 11440b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 11450b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric/// SSE 1 Only 11480b57cec5SDimitry Andric 11490b57cec5SDimitry Andric// Aliases for intrinsics 1150480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 11510b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 11520b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1153cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG; 11540b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 11550b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1156480093f4SDimitry Andric "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1157cb14a3feSDimitry Andric TB, XS, VEX, VEX_LIG, REX_W; 11580b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 11590b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1160cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedDouble>, TB, XD, VEX, VEX_LIG; 11610b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 11620b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1163480093f4SDimitry Andric "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, 1164cb14a3feSDimitry Andric TB, XD, VEX, VEX_LIG, REX_W; 11650b57cec5SDimitry Andric} 1166480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 11670b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 11680b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1169cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS; 11700b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 11710b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1172480093f4SDimitry Andric "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1173cb14a3feSDimitry Andric TB, XS, REX_W; 11740b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 11750b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1176cb14a3feSDimitry Andric WriteCvtSD2I, SSEPackedDouble>, TB, XD; 11770b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 11780b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1179480093f4SDimitry Andric "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, 1180cb14a3feSDimitry Andric TB, XD, REX_W; 1181480093f4SDimitry Andric} 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 11840b57cec5SDimitry Andric (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11850b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 11860b57cec5SDimitry Andric (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 11870b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 11880b57cec5SDimitry Andric (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 11890b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 11900b57cec5SDimitry Andric (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 11910b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 11920b57cec5SDimitry Andric (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11930b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 11940b57cec5SDimitry Andric (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 11950b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 11960b57cec5SDimitry Andric (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 11970b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 11980b57cec5SDimitry Andric (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 12010b57cec5SDimitry Andric (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12020b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 12030b57cec5SDimitry Andric (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 12040b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 12050b57cec5SDimitry Andric (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12060b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 12070b57cec5SDimitry Andric (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 12080b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 12090b57cec5SDimitry Andric (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12100b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 12110b57cec5SDimitry Andric (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 12120b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 12130b57cec5SDimitry Andric (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12140b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 12150b57cec5SDimitry Andric (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 12160b57cec5SDimitry Andric 1217480093f4SDimitry Andriclet Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 12180b57cec5SDimitry Andricdefm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 12190b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1220cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, VEX_LIG; 12210b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 12220b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1223cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, VEX, REX_W, VEX_LIG; 12240b57cec5SDimitry Andric} 1225480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 12260b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 12270b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1228cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS; 12290b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 12300b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1231cb14a3feSDimitry Andric WriteCvtSS2I, SSEPackedSingle>, TB, XS, REX_W; 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andricdefm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 12340b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 12350b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 1236cb14a3feSDimitry Andric TB, VEX, Requires<[HasAVX, NoVLX]>, WIG; 12370b57cec5SDimitry Andricdefm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 12380b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 12390b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PSY>, 1240cb14a3feSDimitry Andric TB, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG; 12410b57cec5SDimitry Andric 12420b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 12430b57cec5SDimitry Andric "cvtdq2ps\t{$src, $dst|$dst, $src}", 12440b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 1245cb14a3feSDimitry Andric TB, Requires<[UseSSE2]>; 1246480093f4SDimitry Andric} 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric// AVX aliases 12490b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 12500b57cec5SDimitry Andric (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12510b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 12520b57cec5SDimitry Andric (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 12530b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 12540b57cec5SDimitry Andric (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12550b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 12560b57cec5SDimitry Andric (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 12570b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 12580b57cec5SDimitry Andric (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12590b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 12600b57cec5SDimitry Andric (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 12610b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 12620b57cec5SDimitry Andric (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12630b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 12640b57cec5SDimitry Andric (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric// SSE aliases 12670b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 12680b57cec5SDimitry Andric (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12690b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 12700b57cec5SDimitry Andric (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 12710b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 12720b57cec5SDimitry Andric (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 12730b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 12740b57cec5SDimitry Andric (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 12750b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 12760b57cec5SDimitry Andric (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12770b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 12780b57cec5SDimitry Andric (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 12790b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 12800b57cec5SDimitry Andric (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 12810b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 12820b57cec5SDimitry Andric (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric/// SSE 2 Only 12850b57cec5SDimitry Andric 12860b57cec5SDimitry Andric// Convert scalar double to scalar single 1287e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], 1288e8d8bef9SDimitry Andric ExeDomain = SSEPackedSingle in { 12890b57cec5SDimitry Andricdef VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 12900b57cec5SDimitry Andric (ins FR32:$src1, FR64:$src2), 12910b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1292cb14a3feSDimitry Andric VEX, VVVV, VEX_LIG, WIG, 1293480093f4SDimitry Andric Sched<[WriteCvtSD2SS]>, SIMD_EXC; 12940b57cec5SDimitry Andriclet mayLoad = 1 in 12950b57cec5SDimitry Andricdef VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 12960b57cec5SDimitry Andric (ins FR32:$src1, f64mem:$src2), 12970b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1298cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG, 1299480093f4SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 13000b57cec5SDimitry Andric} 13010b57cec5SDimitry Andric 1302480093f4SDimitry Andricdef : Pat<(f32 (any_fpround FR64:$src)), 13030b57cec5SDimitry Andric (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 13040b57cec5SDimitry Andric Requires<[UseAVX]>; 13050b57cec5SDimitry Andric 1306e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 13070b57cec5SDimitry Andricdef CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 13080b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1309480093f4SDimitry Andric [(set FR32:$dst, (any_fpround FR64:$src))]>, 1310480093f4SDimitry Andric Sched<[WriteCvtSD2SS]>, SIMD_EXC; 13110b57cec5SDimitry Andricdef CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 13120b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1313480093f4SDimitry Andric [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, 1314cb14a3feSDimitry Andric TB, XD, Requires<[UseSSE2, OptForSize]>, 1315bdd1243dSDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 13160b57cec5SDimitry Andric} 13170b57cec5SDimitry Andric 1318e8d8bef9SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { 13190b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 13200b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13210b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13220b57cec5SDimitry Andric [(set VR128:$dst, 13230b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1324cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>, 13250b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 13260b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 13270b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 13280b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 13290b57cec5SDimitry Andric [(set VR128:$dst, 13305ffd83dbSDimitry Andric (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1331cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG, Requires<[UseAVX]>, 13320b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 13330b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 13340b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 13350b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13360b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 13370b57cec5SDimitry Andric [(set VR128:$dst, 13380b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1339cb14a3feSDimitry Andric TB, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 13400b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 13410b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 13420b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 13430b57cec5SDimitry Andric [(set VR128:$dst, 13445ffd83dbSDimitry Andric (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1345cb14a3feSDimitry Andric TB, XD, Requires<[UseSSE2]>, 13460b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 13470b57cec5SDimitry Andric} 1348480093f4SDimitry Andric} 13490b57cec5SDimitry Andric 13500b57cec5SDimitry Andric// Convert scalar single to scalar double 13510b57cec5SDimitry Andric// SSE2 instructions with XS prefix 1352e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 13530b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 13540b57cec5SDimitry Andric (ins FR64:$src1, FR32:$src2), 13550b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1356cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG, 1357480093f4SDimitry Andric Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; 13580b57cec5SDimitry Andriclet mayLoad = 1 in 13590b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 13600b57cec5SDimitry Andric (ins FR64:$src1, f32mem:$src2), 13610b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1362cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG, 13630b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 1364480093f4SDimitry Andric Requires<[UseAVX, OptForSize]>, SIMD_EXC; 13650b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0 13660b57cec5SDimitry Andric 1367480093f4SDimitry Andricdef : Pat<(f64 (any_fpextend FR32:$src)), 13680b57cec5SDimitry Andric (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 1369480093f4SDimitry Andricdef : Pat<(any_fpextend (loadf32 addr:$src)), 13700b57cec5SDimitry Andric (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 13710b57cec5SDimitry Andric 1372e8d8bef9SDimitry Andriclet isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 13730b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 13740b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1375480093f4SDimitry Andric [(set FR64:$dst, (any_fpextend FR32:$src))]>, 1376cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; 13770b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 13780b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1379480093f4SDimitry Andric [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, 1380cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2, OptForSize]>, 1381bdd1243dSDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC; 13820b57cec5SDimitry Andric} // isCodeGenOnly = 1 13830b57cec5SDimitry Andric 1384e8d8bef9SDimitry Andriclet hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, 1385e8d8bef9SDimitry Andric ExeDomain = SSEPackedSingle in { 13860b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 13870b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 13880b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1389cb14a3feSDimitry Andric []>, TB, XS, VEX, VVVV, VEX_LIG, WIG, 13900b57cec5SDimitry Andric Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 13910b57cec5SDimitry Andriclet mayLoad = 1 in 13920b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 13930b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 13940b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1395cb14a3feSDimitry Andric []>, TB, XS, VEX, VVVV, VEX_LIG, WIG, Requires<[HasAVX]>, 13960b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 13970b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 13980b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 13990b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 14000b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1401cb14a3feSDimitry Andric []>, TB, XS, Requires<[UseSSE2]>, 14020b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>; 14030b57cec5SDimitry Andriclet mayLoad = 1 in 14040b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 14050b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 14060b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1407cb14a3feSDimitry Andric []>, TB, XS, Requires<[UseSSE2]>, 14080b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 14090b57cec5SDimitry Andric} 14100b57cec5SDimitry Andric} // hasSideEffects = 0 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 14130b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 14140b57cec5SDimitry Andric// vmovs{s,d} instructions 14150b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 14160b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14170b57cec5SDimitry Andric (v4f32 VR128:$dst), 14180b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1419480093f4SDimitry Andric (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 14200b57cec5SDimitry Andric (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 14210b57cec5SDimitry Andric 14220b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14230b57cec5SDimitry Andric (v2f64 VR128:$dst), 14240b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1425480093f4SDimitry Andric (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 14260b57cec5SDimitry Andric (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14290b57cec5SDimitry Andric (v4f32 VR128:$dst), 1430480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 14310b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14340b57cec5SDimitry Andric (v4f32 VR128:$dst), 1435480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 14360b57cec5SDimitry Andric (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 14370b57cec5SDimitry Andric 14380b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14390b57cec5SDimitry Andric (v4f32 VR128:$dst), 1440480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 14410b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 14420b57cec5SDimitry Andric 14430b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14440b57cec5SDimitry Andric (v4f32 VR128:$dst), 1445480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 14460b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14490b57cec5SDimitry Andric (v2f64 VR128:$dst), 1450480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 14510b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 14520b57cec5SDimitry Andric 14530b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14540b57cec5SDimitry Andric (v2f64 VR128:$dst), 1455480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 14560b57cec5SDimitry Andric (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14590b57cec5SDimitry Andric (v2f64 VR128:$dst), 1460480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 14610b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 14620b57cec5SDimitry Andric 14630b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14640b57cec5SDimitry Andric (v2f64 VR128:$dst), 1465480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 14660b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 14670b57cec5SDimitry Andric} // Predicates = [UseAVX] 14680b57cec5SDimitry Andric 14690b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 14700b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 14710b57cec5SDimitry Andric (v4f32 VR128:$dst), 14720b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1473480093f4SDimitry Andric (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 14740b57cec5SDimitry Andric (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14770b57cec5SDimitry Andric (v2f64 VR128:$dst), 14780b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1479480093f4SDimitry Andric (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 14800b57cec5SDimitry Andric (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 14810b57cec5SDimitry Andric 14820b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14830b57cec5SDimitry Andric (v2f64 VR128:$dst), 1484480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 14850b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 14860b57cec5SDimitry Andric 14870b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14880b57cec5SDimitry Andric (v2f64 VR128:$dst), 1489480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 14900b57cec5SDimitry Andric (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14930b57cec5SDimitry Andric (v2f64 VR128:$dst), 1494480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 14950b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 14980b57cec5SDimitry Andric (v2f64 VR128:$dst), 1499480093f4SDimitry Andric (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 15000b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 15010b57cec5SDimitry Andric} // Predicates = [UseSSE2] 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 15040b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15050b57cec5SDimitry Andric (v4f32 VR128:$dst), 1506480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 15070b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 15080b57cec5SDimitry Andric 15090b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15100b57cec5SDimitry Andric (v4f32 VR128:$dst), 1511480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 15120b57cec5SDimitry Andric (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15150b57cec5SDimitry Andric (v4f32 VR128:$dst), 1516480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 15170b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 15200b57cec5SDimitry Andric (v4f32 VR128:$dst), 1521480093f4SDimitry Andric (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 15220b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 15230b57cec5SDimitry Andric} // Predicates = [UseSSE1] 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 15260b57cec5SDimitry Andric// Convert packed single/double fp to doubleword 15270b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15280b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15290b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 153006c3fb27SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, WIG, SIMD_EXC; 15310b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15320b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15330b57cec5SDimitry Andric [(set VR128:$dst, 15340b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 153506c3fb27SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, WIG, SIMD_EXC; 15360b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 15370b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15380b57cec5SDimitry Andric [(set VR256:$dst, 15390b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 154006c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG, SIMD_EXC; 15410b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 15420b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15430b57cec5SDimitry Andric [(set VR256:$dst, 15440b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 154506c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, WIG, SIMD_EXC; 15460b57cec5SDimitry Andric} 15470b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15480b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15490b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1550480093f4SDimitry Andric Sched<[WriteCvtPS2I]>, SIMD_EXC; 15510b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15520b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 15530b57cec5SDimitry Andric [(set VR128:$dst, 15540b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 1555480093f4SDimitry Andric Sched<[WriteCvtPS2ILd]>, SIMD_EXC; 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers 1558480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 15590b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 15600b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 15610b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 15620b57cec5SDimitry Andricdef VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15630b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 15640b57cec5SDimitry Andric [(set VR128:$dst, 15650b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 156606c3fb27SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, WIG; 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andric// XMM only 15690b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 15700b57cec5SDimitry Andric "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 15710b57cec5SDimitry Andric [(set VR128:$dst, 15720b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 157306c3fb27SDimitry Andric Sched<[WriteCvtPD2ILd]>, WIG; 15740b57cec5SDimitry Andric 15750b57cec5SDimitry Andric// YMM only 15760b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 15770b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 15780b57cec5SDimitry Andric [(set VR128:$dst, 15790b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 158006c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG; 15810b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 15820b57cec5SDimitry Andric "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 15830b57cec5SDimitry Andric [(set VR128:$dst, 15840b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 158506c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG; 15860b57cec5SDimitry Andric} 15870b57cec5SDimitry Andric 1588*0fca6ea1SDimitry Andriclet Predicates = [HasAVX] in { 1589*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint VR128:$src)), (VCVTPS2DQrr VR128:$src)>; 1590*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQrm addr:$src)>; 1591*0fca6ea1SDimitry Andric def : Pat<(v8i32 (lrint VR256:$src)), (VCVTPS2DQYrr VR256:$src)>; 1592*0fca6ea1SDimitry Andric def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQYrm addr:$src)>; 1593*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint VR256:$src)), (VCVTPD2DQYrr VR256:$src)>; 1594*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQYrm addr:$src)>; 1595*0fca6ea1SDimitry Andric} 1596*0fca6ea1SDimitry Andric 1597*0fca6ea1SDimitry Andriclet Predicates = [UseSSE2] in { 1598*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint VR128:$src)), (CVTPS2DQrr VR128:$src)>; 1599*0fca6ea1SDimitry Andric def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (CVTPS2DQrm addr:$src)>; 1600*0fca6ea1SDimitry Andric} 1601*0fca6ea1SDimitry Andric 16020b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 16030b57cec5SDimitry Andric (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 16040b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 16050b57cec5SDimitry Andric (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 16060b57cec5SDimitry Andric 16070b57cec5SDimitry Andricdef CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16080b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 16090b57cec5SDimitry Andric [(set VR128:$dst, 16100b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 1611480093f4SDimitry Andric Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 16120b57cec5SDimitry Andricdef CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16130b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 16140b57cec5SDimitry Andric [(set VR128:$dst, 16150b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1616480093f4SDimitry Andric Sched<[WriteCvtPD2I]>, SIMD_EXC; 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword 16190b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix 1620480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 16210b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 16220b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16230b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16240b57cec5SDimitry Andric [(set VR128:$dst, 1625480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 162606c3fb27SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, WIG; 16270b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16280b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16290b57cec5SDimitry Andric [(set VR128:$dst, 1630480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, 163106c3fb27SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, WIG; 16320b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 16330b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16340b57cec5SDimitry Andric [(set VR256:$dst, 1635480093f4SDimitry Andric (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, 163606c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG; 16370b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 16380b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16390b57cec5SDimitry Andric [(set VR256:$dst, 1640480093f4SDimitry Andric (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, 16410b57cec5SDimitry Andric VEX, VEX_L, 164206c3fb27SDimitry Andric Sched<[WriteCvtPS2IYLd]>, WIG; 16430b57cec5SDimitry Andric} 16440b57cec5SDimitry Andric 16450b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16460b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16470b57cec5SDimitry Andric [(set VR128:$dst, 1648480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 16490b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 16500b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16510b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 16520b57cec5SDimitry Andric [(set VR128:$dst, 1653480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, 16540b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 1655480093f4SDimitry Andric} 16560b57cec5SDimitry Andric 16570b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 16580b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 16590b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1660480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 16610b57cec5SDimitry Andric// XMM only 16620b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16630b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16640b57cec5SDimitry Andric [(set VR128:$dst, 1665480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 166606c3fb27SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, WIG; 16670b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 16680b57cec5SDimitry Andric "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 16690b57cec5SDimitry Andric [(set VR128:$dst, 1670480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, 167106c3fb27SDimitry Andric VEX, Sched<[WriteCvtPD2ILd]>, WIG; 16720b57cec5SDimitry Andric 16730b57cec5SDimitry Andric// YMM only 16740b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 16750b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 16760b57cec5SDimitry Andric [(set VR128:$dst, 1677480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, 167806c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG; 16790b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 16800b57cec5SDimitry Andric "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 16810b57cec5SDimitry Andric [(set VR128:$dst, 1682480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, 168306c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG; 16840b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 16850b57cec5SDimitry Andric 16860b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 16870b57cec5SDimitry Andric (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 16880b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 16890b57cec5SDimitry Andric (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 16900b57cec5SDimitry Andric 16910b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1692480093f4SDimitry Andric def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))), 16930b57cec5SDimitry Andric (VCVTTPD2DQYrr VR256:$src)>; 1694480093f4SDimitry Andric def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))), 16950b57cec5SDimitry Andric (VCVTTPD2DQYrm addr:$src)>; 16960b57cec5SDimitry Andric} 16970b57cec5SDimitry Andric 16980b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 16990b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 17000b57cec5SDimitry Andric [(set VR128:$dst, 1701480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1702480093f4SDimitry Andric Sched<[WriteCvtPD2I]>, SIMD_EXC; 17030b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 17040b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 17050b57cec5SDimitry Andric [(set VR128:$dst, 1706480093f4SDimitry Andric (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, 1707480093f4SDimitry Andric Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 17080b57cec5SDimitry Andric 17090b57cec5SDimitry Andric// Convert packed single to packed double 1710480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 17110b57cec5SDimitry Andric // SSE2 instructions without OpSize prefix 17120b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17130b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1714480093f4SDimitry Andric [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1715cb14a3feSDimitry Andric TB, VEX, Sched<[WriteCvtPS2PD]>, WIG; 17160b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 17170b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 17180b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1719cb14a3feSDimitry Andric TB, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG; 17200b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 17210b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1722480093f4SDimitry Andric [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, 1723cb14a3feSDimitry Andric TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG; 17240b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 17250b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 17260b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 1727cb14a3feSDimitry Andric TB, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG; 17280b57cec5SDimitry Andric} 17290b57cec5SDimitry Andric 1730480093f4SDimitry Andriclet Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { 17310b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17320b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 1733480093f4SDimitry Andric [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1734cb14a3feSDimitry Andric TB, Sched<[WriteCvtPS2PD]>; 17350b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 17360b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 17370b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1738cb14a3feSDimitry Andric TB, Sched<[WriteCvtPS2PD.Folded]>; 17390b57cec5SDimitry Andric} 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP 17420b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 17430b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 17440b57cec5SDimitry Andricdef VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 17450b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17460b57cec5SDimitry Andric [(set VR128:$dst, 1747480093f4SDimitry Andric (v2f64 (X86any_VSintToFP 17480b57cec5SDimitry Andric (bc_v4i32 17490b57cec5SDimitry Andric (v2i64 (scalar_to_vector 17500b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 175106c3fb27SDimitry Andric VEX, Sched<[WriteCvtI2PDLd]>, WIG; 17520b57cec5SDimitry Andricdef VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17530b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17540b57cec5SDimitry Andric [(set VR128:$dst, 1755480093f4SDimitry Andric (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 175606c3fb27SDimitry Andric VEX, Sched<[WriteCvtI2PD]>, WIG; 17570b57cec5SDimitry Andricdef VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 17580b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17590b57cec5SDimitry Andric [(set VR256:$dst, 1760480093f4SDimitry Andric (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>, 17610b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 176206c3fb27SDimitry Andric WIG; 17630b57cec5SDimitry Andricdef VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 17640b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 17650b57cec5SDimitry Andric [(set VR256:$dst, 1766480093f4SDimitry Andric (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>, 176706c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDY]>, WIG; 17680b57cec5SDimitry Andric} 17690b57cec5SDimitry Andric 17700b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 17710b57cec5SDimitry Andricdef CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 17720b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 17730b57cec5SDimitry Andric [(set VR128:$dst, 1774480093f4SDimitry Andric (v2f64 (X86any_VSintToFP 17750b57cec5SDimitry Andric (bc_v4i32 17760b57cec5SDimitry Andric (v2i64 (scalar_to_vector 17770b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 17780b57cec5SDimitry Andric Sched<[WriteCvtI2PDLd]>; 17790b57cec5SDimitry Andricdef CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 17800b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 17810b57cec5SDimitry Andric [(set VR128:$dst, 1782480093f4SDimitry Andric (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 17830b57cec5SDimitry Andric Sched<[WriteCvtI2PD]>; 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric// AVX register conversion intrinsics 17860b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1787480093f4SDimitry Andric def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 17880b57cec5SDimitry Andric (VCVTDQ2PDrm addr:$src)>; 17890b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric// SSE2 register conversion intrinsics 17920b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1793480093f4SDimitry Andric def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 17940b57cec5SDimitry Andric (CVTDQ2PDrm addr:$src)>; 17950b57cec5SDimitry Andric} // Predicates = [UseSSE2] 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric// Convert packed double to packed single 17980b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 17990b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 18000b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1801480093f4SDimitry Andriclet Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 18020b57cec5SDimitry Andric// XMM only 18030b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 18040b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1805349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 180606c3fb27SDimitry Andric VEX, Sched<[WriteCvtPD2PS]>, WIG; 18070b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 18080b57cec5SDimitry Andric "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 1809349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>, 181006c3fb27SDimitry Andric VEX, Sched<[WriteCvtPD2PS.Folded]>, WIG; 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 18130b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1814349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>, 181506c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, WIG; 18160b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 18170b57cec5SDimitry Andric "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 1818349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>, 181906c3fb27SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, WIG; 18200b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 18230b57cec5SDimitry Andric (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 18240b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 18250b57cec5SDimitry Andric (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 18260b57cec5SDimitry Andric 18270b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 18280b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1829349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 1830480093f4SDimitry Andric Sched<[WriteCvtPD2PS]>, SIMD_EXC; 18310b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 18320b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1833349cc55cSDimitry Andric [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>, 1834480093f4SDimitry Andric Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; 18350b57cec5SDimitry Andric 18360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18370b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions 18380b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 18410b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 18425ffd83dbSDimitry Andric Operand memop, SDNode OpNode, ValueType VT, 18430b57cec5SDimitry Andric PatFrag ld_frag, string asm, 18445ffd83dbSDimitry Andric X86FoldableSchedWrite sched, 18455ffd83dbSDimitry Andric PatFrags mem_frags> { 1846*0fca6ea1SDimitry Andric def rri_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 18475ffd83dbSDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm, 18485ffd83dbSDimitry Andric [(set VR128:$dst, (OpNode (VT VR128:$src1), 18495ffd83dbSDimitry Andric VR128:$src2, timm:$cc))]>, 18505ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 18515ffd83dbSDimitry Andric let mayLoad = 1 in 1852*0fca6ea1SDimitry Andric def rmi_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 18535ffd83dbSDimitry Andric (ins VR128:$src1, memop:$src2, u8imm:$cc), asm, 18545ffd83dbSDimitry Andric [(set VR128:$dst, (OpNode (VT VR128:$src1), 18555ffd83dbSDimitry Andric (mem_frags addr:$src2), timm:$cc))]>, 18565ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 18575ffd83dbSDimitry Andric 18585ffd83dbSDimitry Andric let isCodeGenOnly = 1 in { 18590b57cec5SDimitry Andric let isCommutable = 1 in 1860*0fca6ea1SDimitry Andric def rri : SIi8<0xC2, MRMSrcReg, 18610b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 18625ffd83dbSDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>, 18635ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 1864*0fca6ea1SDimitry Andric def rmi : SIi8<0xC2, MRMSrcMem, 18650b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 18665ffd83dbSDimitry Andric [(set RC:$dst, (OpNode RC:$src1, 18678bcb0991SDimitry Andric (ld_frag addr:$src2), timm:$cc))]>, 18685ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 18690b57cec5SDimitry Andric } 1870480093f4SDimitry Andric} 18710b57cec5SDimitry Andric 18720b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 18735ffd83dbSDimitry Andricdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 18740b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 18750b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 1876cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG; 18770b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 18785ffd83dbSDimitry Andricdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 18795ffd83dbSDimitry Andric "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 18800b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 1881cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG; 18825ffd83dbSDimitry Andric 18830b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 18840b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 18855ffd83dbSDimitry Andric defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 18865ffd83dbSDimitry Andric "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1887cb14a3feSDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, TB, XS; 18880b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 18895ffd83dbSDimitry Andric defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 18905ffd83dbSDimitry Andric "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1891cb14a3feSDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, TB, XD; 18920b57cec5SDimitry Andric} 18930b57cec5SDimitry Andric 18940b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 1895fe6060f1SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode, 18960b57cec5SDimitry Andric ValueType vt, X86MemOperand x86memop, 1897480093f4SDimitry Andric PatFrag ld_frag, string OpcodeStr, Domain d, 18985ffd83dbSDimitry Andric X86FoldableSchedWrite sched = WriteFComX> { 18995ffd83dbSDimitry Andric let ExeDomain = d in { 19000b57cec5SDimitry Andric def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 19010b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 19020b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 19035ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 19040b57cec5SDimitry Andric let mayLoad = 1 in 19050b57cec5SDimitry Andric def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 19060b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 19070b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 19080b57cec5SDimitry Andric (ld_frag addr:$src2)))]>, 19095ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 19100b57cec5SDimitry Andric} 19110b57cec5SDimitry Andric} 19120b57cec5SDimitry Andric 19130b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 19140b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 19150b57cec5SDimitry Andric ValueType vt, Operand memop, 19165ffd83dbSDimitry Andric PatFrags mem_frags, string OpcodeStr, 1917480093f4SDimitry Andric Domain d, 19185ffd83dbSDimitry Andric X86FoldableSchedWrite sched = WriteFComX> { 19195ffd83dbSDimitry Andriclet ExeDomain = d in { 19200b57cec5SDimitry Andric def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 19210b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 19220b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 19235ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 19240b57cec5SDimitry Andriclet mayLoad = 1 in 19250b57cec5SDimitry Andric def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 19260b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 19270b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 19285ffd83dbSDimitry Andric (mem_frags addr:$src2)))]>, 19295ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 19300b57cec5SDimitry Andric} 1931480093f4SDimitry Andric} 19320b57cec5SDimitry Andric 19330b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 1934480093f4SDimitry Andric defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1935cb14a3feSDimitry Andric "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; 1936480093f4SDimitry Andric defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1937cb14a3feSDimitry Andric "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; 1938480093f4SDimitry Andric defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1939cb14a3feSDimitry Andric "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; 1940480093f4SDimitry Andric defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1941cb14a3feSDimitry Andric "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 19440b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1945cb14a3feSDimitry Andric sse_load_f32, "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; 19460b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1947cb14a3feSDimitry Andric sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; 19480b57cec5SDimitry Andric 19490b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1950cb14a3feSDimitry Andric sse_load_f32, "comiss", SSEPackedSingle>, TB, VEX, VEX_LIG, WIG; 19510b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1952cb14a3feSDimitry Andric sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, VEX, VEX_LIG, WIG; 19530b57cec5SDimitry Andric } 1954480093f4SDimitry Andric defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1955cb14a3feSDimitry Andric "ucomiss", SSEPackedSingle>, TB; 1956480093f4SDimitry Andric defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1957cb14a3feSDimitry Andric "ucomisd", SSEPackedDouble>, TB, PD; 1958480093f4SDimitry Andric defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1959cb14a3feSDimitry Andric "comiss", SSEPackedSingle>, TB; 1960480093f4SDimitry Andric defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1961cb14a3feSDimitry Andric "comisd", SSEPackedDouble>, TB, PD; 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 19640b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1965cb14a3feSDimitry Andric sse_load_f32, "ucomiss", SSEPackedSingle>, TB; 19660b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1967cb14a3feSDimitry Andric sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD; 19680b57cec5SDimitry Andric 19690b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1970cb14a3feSDimitry Andric sse_load_f32, "comiss", SSEPackedSingle>, TB; 19710b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1972cb14a3feSDimitry Andric sse_load_f64, "comisd", SSEPackedDouble>, TB, PD; 19730b57cec5SDimitry Andric } 19740b57cec5SDimitry Andric} // Defs = [EFLAGS] 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions 19770b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 19780b57cec5SDimitry Andric ValueType VT, string asm, 19790b57cec5SDimitry Andric X86FoldableSchedWrite sched, 19800b57cec5SDimitry Andric Domain d, PatFrag ld_frag> { 19810b57cec5SDimitry Andric let isCommutable = 1 in 19820b57cec5SDimitry Andric def rri : PIi8<0xC2, MRMSrcReg, 19830b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1984480093f4SDimitry Andric [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, 19855ffd83dbSDimitry Andric Sched<[sched]>, SIMD_EXC; 19860b57cec5SDimitry Andric def rmi : PIi8<0xC2, MRMSrcMem, 19870b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 19880b57cec5SDimitry Andric [(set RC:$dst, 1989480093f4SDimitry Andric (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, 19905ffd83dbSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1991480093f4SDimitry Andric} 19920b57cec5SDimitry Andric 19930b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 19940b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1995cb14a3feSDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, TB, VEX, VVVV, WIG; 19960b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 19970b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1998cb14a3feSDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, TB, PD, VEX, VVVV, WIG; 19990b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 20000b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2001cb14a3feSDimitry Andric SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, TB, VEX, VVVV, VEX_L, WIG; 20020b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 20030b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2004cb14a3feSDimitry Andric SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, TB, PD, VEX, VVVV, VEX_L, WIG; 20050b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 20060b57cec5SDimitry Andric defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 20070b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2008cb14a3feSDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, TB; 20090b57cec5SDimitry Andric defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 20100b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2011cb14a3feSDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, TB, PD; 20120b57cec5SDimitry Andric} 20130b57cec5SDimitry Andric 20148bcb0991SDimitry Andricdef CommutableCMPCC : PatLeaf<(timm), [{ 20150b57cec5SDimitry Andric uint64_t Imm = N->getZExtValue() & 0x7; 20160b57cec5SDimitry Andric return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 20170b57cec5SDimitry Andric}]>; 20180b57cec5SDimitry Andric 20190b57cec5SDimitry Andric// Patterns to select compares with loads in first operand. 20200b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 2021480093f4SDimitry Andric def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, 20220b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20238bcb0991SDimitry Andric (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; 20240b57cec5SDimitry Andric 2025480093f4SDimitry Andric def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, 20260b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20278bcb0991SDimitry Andric (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; 20280b57cec5SDimitry Andric 2029480093f4SDimitry Andric def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, 20300b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20318bcb0991SDimitry Andric (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 20320b57cec5SDimitry Andric 2033480093f4SDimitry Andric def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, 20340b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20358bcb0991SDimitry Andric (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 20360b57cec5SDimitry Andric 20370b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 20380b57cec5SDimitry Andric CommutableCMPCC:$cc)), 2039*0fca6ea1SDimitry Andric (VCMPSDrmi FR64:$src1, addr:$src2, timm:$cc)>; 20400b57cec5SDimitry Andric 20410b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 20420b57cec5SDimitry Andric CommutableCMPCC:$cc)), 2043*0fca6ea1SDimitry Andric (VCMPSSrmi FR32:$src1, addr:$src2, timm:$cc)>; 20440b57cec5SDimitry Andric} 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 2047480093f4SDimitry Andric def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, 20480b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20498bcb0991SDimitry Andric (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 20500b57cec5SDimitry Andric 20510b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 20520b57cec5SDimitry Andric CommutableCMPCC:$cc)), 2053*0fca6ea1SDimitry Andric (CMPSDrmi FR64:$src1, addr:$src2, timm:$cc)>; 20540b57cec5SDimitry Andric} 20550b57cec5SDimitry Andric 20560b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 2057480093f4SDimitry Andric def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, 20580b57cec5SDimitry Andric CommutableCMPCC:$cc)), 20598bcb0991SDimitry Andric (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 20620b57cec5SDimitry Andric CommutableCMPCC:$cc)), 2063*0fca6ea1SDimitry Andric (CMPSSrmi FR32:$src1, addr:$src2, timm:$cc)>; 20640b57cec5SDimitry Andric} 20650b57cec5SDimitry Andric 20660b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20670b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions 20680b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 20710b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 20720b57cec5SDimitry Andric ValueType vt, string asm, PatFrag mem_frag, 20730b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 20740b57cec5SDimitry Andric bit IsCommutable = 0> { 20750b57cec5SDimitry Andric def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 20760b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 20770b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 20788bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 20790b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 20800b57cec5SDimitry Andric let isCommutable = IsCommutable in 20810b57cec5SDimitry Andric def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 20820b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 20830b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 20848bcb0991SDimitry Andric (i8 timm:$src3))))], d>, 20850b57cec5SDimitry Andric Sched<[sched]>; 20860b57cec5SDimitry Andric} 20870b57cec5SDimitry Andric 20880b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 20890b57cec5SDimitry Andric defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 20900b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20910b57cec5SDimitry Andric loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 2092cb14a3feSDimitry Andric TB, VEX, VVVV, WIG; 20930b57cec5SDimitry Andric defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 20940b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20950b57cec5SDimitry Andric loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 2096cb14a3feSDimitry Andric TB, VEX, VVVV, VEX_L, WIG; 20970b57cec5SDimitry Andric defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 20980b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 20990b57cec5SDimitry Andric loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 2100cb14a3feSDimitry Andric TB, PD, VEX, VVVV, WIG; 21010b57cec5SDimitry Andric defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 21020b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 21030b57cec5SDimitry Andric loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 2104cb14a3feSDimitry Andric TB, PD, VEX, VVVV, VEX_L, WIG; 21050b57cec5SDimitry Andric} 21060b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 21070b57cec5SDimitry Andric defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 21080b57cec5SDimitry Andric "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2109cb14a3feSDimitry Andric memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; 21100b57cec5SDimitry Andric defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 21110b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2112cb14a3feSDimitry Andric memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD; 21130b57cec5SDimitry Andric} 21140b57cec5SDimitry Andric 21150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21160b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions 21170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 21180b57cec5SDimitry Andric 21190b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 21200b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 21210b57cec5SDimitry Andric PatFrag mem_frag, RegisterClass RC, 21220b57cec5SDimitry Andric X86MemOperand x86memop, string asm, 21230b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 21240b57cec5SDimitry Andric bit IsCommutable = 0> { 21250b57cec5SDimitry Andric let isCommutable = IsCommutable in 21260b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, 21270b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 21280b57cec5SDimitry Andric asm, [(set RC:$dst, 21290b57cec5SDimitry Andric (vt (OpNode RC:$src1, RC:$src2)))], d>, 21300b57cec5SDimitry Andric Sched<[sched]>; 21310b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, 21320b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 21330b57cec5SDimitry Andric asm, [(set RC:$dst, 21340b57cec5SDimitry Andric (vt (OpNode RC:$src1, 21350b57cec5SDimitry Andric (mem_frag addr:$src2))))], d>, 21360b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 21370b57cec5SDimitry Andric} 21380b57cec5SDimitry Andric 21390b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 21400b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 21410b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2142cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG; 21430b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 21440b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2145cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD, VEX, VVVV, WIG; 21460b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 21470b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2148cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, TB, VEX, VVVV, WIG; 21490b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 21500b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2151cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD, VEX, VVVV, WIG; 21520b57cec5SDimitry Andric 21530b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 21540b57cec5SDimitry Andric VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2155cb14a3feSDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG; 21560b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 21570b57cec5SDimitry Andric VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2158cb14a3feSDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG; 21590b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 21600b57cec5SDimitry Andric VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2161cb14a3feSDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, TB, VEX, VVVV, VEX_L, WIG; 21620b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 21630b57cec5SDimitry Andric VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2164cb14a3feSDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, TB, PD, VEX, VVVV, VEX_L, WIG; 21650b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX] 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 21680b57cec5SDimitry Andric defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 21690b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2170cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; 21710b57cec5SDimitry Andric defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 21720b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2173cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, TB, PD; 21740b57cec5SDimitry Andric defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 21750b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2176cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, TB; 21770b57cec5SDimitry Andric defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 21780b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2179cb14a3feSDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, TB, PD; 21800b57cec5SDimitry Andric} // Constraints = "$src1 = $dst" 21810b57cec5SDimitry Andric 21820b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 21830b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 21840b57cec5SDimitry Andric (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 21850b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 21860b57cec5SDimitry Andric (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 21870b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 21880b57cec5SDimitry Andric (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 21890b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 21900b57cec5SDimitry Andric (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 21910b57cec5SDimitry Andric 21920b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 21930b57cec5SDimitry Andric (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 21940b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 21950b57cec5SDimitry Andric (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 21960b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 21970b57cec5SDimitry Andric (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 21980b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 21990b57cec5SDimitry Andric (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 22000b57cec5SDimitry Andric} 22010b57cec5SDimitry Andric 22020b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 22030b57cec5SDimitry Andric // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 22040b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 22058bcb0991SDimitry Andric (v2f64 (simple_load addr:$src2)))), 22060b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 22070b57cec5SDimitry Andric} 22080b57cec5SDimitry Andric 22090b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22100b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask 22110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 22120b57cec5SDimitry Andric 22130b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 22140b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 22150b57cec5SDimitry Andric string asm, Domain d> { 22160b57cec5SDimitry Andric def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 22170b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 22180b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 22190b57cec5SDimitry Andric Sched<[WriteFMOVMSK]>; 22200b57cec5SDimitry Andric} 22210b57cec5SDimitry Andric 22220b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 22230b57cec5SDimitry Andric defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2224cb14a3feSDimitry Andric SSEPackedSingle>, TB, VEX, WIG; 22250b57cec5SDimitry Andric defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2226cb14a3feSDimitry Andric SSEPackedDouble>, TB, PD, VEX, WIG; 22270b57cec5SDimitry Andric defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 2228cb14a3feSDimitry Andric SSEPackedSingle>, TB, VEX, VEX_L, WIG; 22290b57cec5SDimitry Andric defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 2230cb14a3feSDimitry Andric SSEPackedDouble>, TB, PD, VEX, VEX_L, WIG; 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 22330b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 22340b57cec5SDimitry Andric (VMOVMSKPSrr VR128:$src)>; 22350b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 22360b57cec5SDimitry Andric (VMOVMSKPDrr VR128:$src)>; 22370b57cec5SDimitry Andric def : Pat<(X86movmsk (v8i32 VR256:$src)), 22380b57cec5SDimitry Andric (VMOVMSKPSYrr VR256:$src)>; 22390b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i64 VR256:$src)), 22400b57cec5SDimitry Andric (VMOVMSKPDYrr VR256:$src)>; 22410b57cec5SDimitry Andric} 22420b57cec5SDimitry Andric 22430b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2244cb14a3feSDimitry Andric SSEPackedSingle>, TB; 22450b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2246cb14a3feSDimitry Andric SSEPackedDouble>, TB, PD; 22470b57cec5SDimitry Andric 22480b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 22490b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 22500b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 22510b57cec5SDimitry Andric (MOVMSKPSrr VR128:$src)>; 22520b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 22530b57cec5SDimitry Andric (MOVMSKPDrr VR128:$src)>; 22540b57cec5SDimitry Andric} 22550b57cec5SDimitry Andric 22560b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 22570b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 22580b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 22590b57cec5SDimitry Andric 22600b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 22610b57cec5SDimitry Andric 22620b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator. 22630b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 22640b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 22650b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 22660b57cec5SDimitry Andric bit IsCommutable, bit Is2Addr> { 22670b57cec5SDimitry Andric let isCommutable = IsCommutable in 22680b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 22690b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 22700b57cec5SDimitry Andric !if(Is2Addr, 22710b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 22720b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 22730b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 22740b57cec5SDimitry Andric Sched<[sched]>; 22750b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 22760b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 22770b57cec5SDimitry Andric !if(Is2Addr, 22780b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 22790b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 22800b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 22810b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 22820b57cec5SDimitry Andric} 22830b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 22840b57cec5SDimitry Andric 22850b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 22860b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, 22870b57cec5SDimitry Andric X86SchedWriteWidths sched, bit IsCommutable, 22880b57cec5SDimitry Andric Predicate prd> { 22890b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 22900b57cec5SDimitry Andric defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 22910b57cec5SDimitry Andric VR128, load, i128mem, sched.XMM, 2292cb14a3feSDimitry Andric IsCommutable, 0>, VEX, VVVV, WIG; 22930b57cec5SDimitry Andric 22940b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 22950b57cec5SDimitry Andric defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 22960b57cec5SDimitry Andric memop, i128mem, sched.XMM, IsCommutable, 1>; 22970b57cec5SDimitry Andric 22980b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 22990b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 23000b57cec5SDimitry Andric OpVT256, VR256, load, i256mem, sched.YMM, 2301cb14a3feSDimitry Andric IsCommutable, 0>, VEX, VVVV, VEX_L, WIG; 23020b57cec5SDimitry Andric} 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions 23050b57cec5SDimitry Andric 23060b57cec5SDimitry Andricdefm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 23070b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 23080b57cec5SDimitry Andricdefm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 23090b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 23100b57cec5SDimitry Andricdefm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 23110b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 23120b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 23130b57cec5SDimitry Andric SchedWriteVecLogic, 0, NoVLX>; 23140b57cec5SDimitry Andric 23150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 23160b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions 23170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 23200b57cec5SDimitry Andric/// 23210b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2 23220b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later. 23230b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2324349cc55cSDimitry Andric X86SchedWriteWidths sched> { 23250b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 23260b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 23270b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 2328cb14a3feSDimitry Andric [], [], 0>, TB, VEX, VVVV, VEX_L, WIG; 23290b57cec5SDimitry Andric 23300b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 23310b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 2332cb14a3feSDimitry Andric [], [], 0>, TB, PD, VEX, VVVV, VEX_L, WIG; 23330b57cec5SDimitry Andric 23340b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 23350b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2336cb14a3feSDimitry Andric [], [], 0>, TB, VEX, VVVV, WIG; 23370b57cec5SDimitry Andric 23380b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 23390b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2340cb14a3feSDimitry Andric [], [], 0>, TB, PD, VEX, VVVV, WIG; 23410b57cec5SDimitry Andric } 23420b57cec5SDimitry Andric 23430b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 23440b57cec5SDimitry Andric defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 23450b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2346cb14a3feSDimitry Andric [], []>, TB; 23470b57cec5SDimitry Andric 23480b57cec5SDimitry Andric defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 23490b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2350cb14a3feSDimitry Andric [], []>, TB, PD; 23510b57cec5SDimitry Andric } 23520b57cec5SDimitry Andric} 23530b57cec5SDimitry Andric 2354349cc55cSDimitry Andricdefm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>; 2355349cc55cSDimitry Andricdefm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>; 2356349cc55cSDimitry Andricdefm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>; 23570b57cec5SDimitry Andriclet isCommutable = 0 in 2358349cc55cSDimitry Andric defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>; 23590b57cec5SDimitry Andric 23600b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 23610b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 23620b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23630b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 23640b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23650b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 23660b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 23690b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23700b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 23710b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23720b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 23730b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 23740b57cec5SDimitry Andric 23750b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 23760b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23770b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 23780b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23790b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 23800b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 23810b57cec5SDimitry Andric 23820b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 23830b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23840b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 23850b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23860b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 23870b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 23880b57cec5SDimitry Andric 23890b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 23900b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23910b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 23920b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23930b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 23940b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 23950b57cec5SDimitry Andric 23960b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 23970b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 23980b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 23990b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 24000b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 24010b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 24040b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 24050b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 24060b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 24070b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 24080b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 24090b57cec5SDimitry Andric 24100b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 24110b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 24120b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 24130b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 24140b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 24150b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 24160b57cec5SDimitry Andric} 24170b57cec5SDimitry Andric 24180b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with 24190b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available. 24200b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 24210b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 24220b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24230b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 24240b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24250b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 24260b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24270b57cec5SDimitry Andric def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 24280b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 24310b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24320b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 24330b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24340b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 24350b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24360b57cec5SDimitry Andric def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 24370b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 24380b57cec5SDimitry Andric 24390b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 24400b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24410b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 24420b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24430b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 24440b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24450b57cec5SDimitry Andric def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 24460b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 24490b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24500b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 24510b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24520b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 24530b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24540b57cec5SDimitry Andric def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 24550b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 24580b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24590b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 24600b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24610b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 24620b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24630b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 24640b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 24670b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24680b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 24690b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24700b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 24710b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24720b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 24730b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 24740b57cec5SDimitry Andric 24750b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 24760b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24770b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 24780b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24790b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 24800b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24810b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 24820b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 24830b57cec5SDimitry Andric 24840b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 24850b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24860b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 24870b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24880b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 24890b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24900b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 24910b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 24920b57cec5SDimitry Andric} 24930b57cec5SDimitry Andric 24940b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 24950b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 24960b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24970b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 24980b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 24990b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 25000b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 25010b57cec5SDimitry Andric 25020b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 25030b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 25040b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 25050b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 25060b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 25070b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 25080b57cec5SDimitry Andric 25090b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 25100b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 25110b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 25120b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 25130b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 25140b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 25150b57cec5SDimitry Andric 25160b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 25170b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 25180b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 25190b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 25200b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 25210b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 25220b57cec5SDimitry Andric 25230b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 25240b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25250b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 25260b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25270b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 25280b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 25290b57cec5SDimitry Andric 25300b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 25310b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25320b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 25330b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25340b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 25350b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 25360b57cec5SDimitry Andric 25370b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 25380b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25390b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 25400b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25410b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 25420b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 25430b57cec5SDimitry Andric 25440b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 25450b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25460b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 25470b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25480b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 25490b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 25500b57cec5SDimitry Andric} 25510b57cec5SDimitry Andric 25520b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 25530b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 25540b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25550b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 25560b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25570b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 25580b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 25590b57cec5SDimitry Andric 25600b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 25610b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25620b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 25630b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25640b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 25650b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 25660b57cec5SDimitry Andric 25670b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 25680b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25690b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 25700b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25710b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 25720b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 25730b57cec5SDimitry Andric 25740b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 25750b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25760b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 25770b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25780b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 25790b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 25800b57cec5SDimitry Andric 25810b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 25820b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25830b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 25840b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25850b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 25860b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 25870b57cec5SDimitry Andric 25880b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 25890b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25900b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 25910b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25920b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 25930b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 25940b57cec5SDimitry Andric 25950b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 25960b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25970b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 25980b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 25990b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 26000b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 26010b57cec5SDimitry Andric 26020b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 26030b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 26040b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 26050b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 26060b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 26070b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 26080b57cec5SDimitry Andric} 26090b57cec5SDimitry Andric 26100b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available. 26110b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 26120b57cec5SDimitry Andric (ANDPSrr VR128:$src1, VR128:$src2)>; 26130b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 26140b57cec5SDimitry Andric (ORPSrr VR128:$src1, VR128:$src2)>; 26150b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 26160b57cec5SDimitry Andric (XORPSrr VR128:$src1, VR128:$src2)>; 26170b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 26180b57cec5SDimitry Andric (ANDNPSrr VR128:$src1, VR128:$src2)>; 26190b57cec5SDimitry Andric 26200b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 26210b57cec5SDimitry Andric (ANDPSrm VR128:$src1, addr:$src2)>; 26220b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 26230b57cec5SDimitry Andric (ORPSrm VR128:$src1, addr:$src2)>; 26240b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 26250b57cec5SDimitry Andric (XORPSrm VR128:$src1, addr:$src2)>; 26260b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 26270b57cec5SDimitry Andric (ANDNPSrm VR128:$src1, addr:$src2)>; 26280b57cec5SDimitry Andric 26290b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 26300b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions 26310b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 26320b57cec5SDimitry Andric 26330b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 26340b57cec5SDimitry Andric/// vector forms. 26350b57cec5SDimitry Andric/// 26360b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 26370b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 26380b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar) 26390b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted). 26400b57cec5SDimitry Andric/// 26410b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem. 26420b57cec5SDimitry Andric/// 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 26450b57cec5SDimitry Andric/// classes below 26460b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2647fe6060f1SDimitry Andric SDPatternOperator OpNode, X86SchedWriteSizes sched> { 2648480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 26490b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 26500b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 26510b57cec5SDimitry Andric VR128, v4f32, f128mem, loadv4f32, 2652cb14a3feSDimitry Andric SSEPackedSingle, sched.PS.XMM, 0>, TB, VEX, VVVV, WIG; 26530b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 26540b57cec5SDimitry Andric VR128, v2f64, f128mem, loadv2f64, 2655cb14a3feSDimitry Andric SSEPackedDouble, sched.PD.XMM, 0>, TB, PD, VEX, VVVV, WIG; 26560b57cec5SDimitry Andric 26570b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 26580b57cec5SDimitry Andric OpNode, VR256, v8f32, f256mem, loadv8f32, 2659cb14a3feSDimitry Andric SSEPackedSingle, sched.PS.YMM, 0>, TB, VEX, VVVV, VEX_L, WIG; 26600b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 26610b57cec5SDimitry Andric OpNode, VR256, v4f64, f256mem, loadv4f64, 2662cb14a3feSDimitry Andric SSEPackedDouble, sched.PD.YMM, 0>, TB, PD, VEX, VVVV, VEX_L, WIG; 26630b57cec5SDimitry Andric } 26640b57cec5SDimitry Andric 26650b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 26660b57cec5SDimitry Andric defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 26670b57cec5SDimitry Andric v4f32, f128mem, memopv4f32, SSEPackedSingle, 2668cb14a3feSDimitry Andric sched.PS.XMM>, TB; 26690b57cec5SDimitry Andric defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 26700b57cec5SDimitry Andric v2f64, f128mem, memopv2f64, SSEPackedDouble, 2671cb14a3feSDimitry Andric sched.PD.XMM>, TB, PD; 26720b57cec5SDimitry Andric } 26730b57cec5SDimitry Andric} 2674480093f4SDimitry Andric} 26750b57cec5SDimitry Andric 2676fe6060f1SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 26770b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2678480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 26790b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 26800b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 2681cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG; 26820b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 26830b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 2684cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG; 26850b57cec5SDimitry Andric 26860b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 26870b57cec5SDimitry Andric defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 26880b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, 2689cb14a3feSDimitry Andric sched.PS.Scl>, TB, XS; 26900b57cec5SDimitry Andric defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 26910b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, 2692cb14a3feSDimitry Andric sched.PD.Scl>, TB, XD; 26930b57cec5SDimitry Andric } 26940b57cec5SDimitry Andric} 2695480093f4SDimitry Andric} 26960b57cec5SDimitry Andric 26970b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 26980b57cec5SDimitry Andric SDPatternOperator OpNode, 26990b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2700480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 2701349cc55cSDimitry Andric defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 27020b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2703cb14a3feSDimitry Andric SSEPackedSingle, sched.PS.Scl, 0>, TB, XS, VEX, VVVV, VEX_LIG, WIG; 2704349cc55cSDimitry Andric defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 27050b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2706cb14a3feSDimitry Andric SSEPackedDouble, sched.PD.Scl, 0>, TB, XD, VEX, VVVV, VEX_LIG, WIG; 27070b57cec5SDimitry Andric 27080b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2709349cc55cSDimitry Andric defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 27100b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2711cb14a3feSDimitry Andric SSEPackedSingle, sched.PS.Scl>, TB, XS; 2712349cc55cSDimitry Andric defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 27130b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2714cb14a3feSDimitry Andric SSEPackedDouble, sched.PD.Scl>, TB, XD; 27150b57cec5SDimitry Andric } 27160b57cec5SDimitry Andric} 2717480093f4SDimitry Andric} 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric// Binary Arithmetic instructions 2720480093f4SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2721480093f4SDimitry Andric basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, 27220b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 2723480093f4SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2724480093f4SDimitry Andric basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 27250b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 27260b57cec5SDimitry Andriclet isCommutable = 0 in { 2727480093f4SDimitry Andric defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2728480093f4SDimitry Andric basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 27290b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 2730480093f4SDimitry Andric defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2731480093f4SDimitry Andric basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 27320b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 27330b57cec5SDimitry Andric defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 27340b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 27350b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 27360b57cec5SDimitry Andric defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 27370b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 27380b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 27390b57cec5SDimitry Andric} 27400b57cec5SDimitry Andric 27410b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 27420b57cec5SDimitry Andric defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 27430b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 27440b57cec5SDimitry Andric defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 27450b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 27460b57cec5SDimitry Andric} 27470b57cec5SDimitry Andric 27480b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from 27490b57cec5SDimitry Andric// either: 27500b57cec5SDimitry Andric// 27510b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend 27520b57cec5SDimitry Andric// 27530b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector 27540b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions 27550b57cec5SDimitry Andric// like addss or mulss. 27560b57cec5SDimitry Andric// 27570b57cec5SDimitry Andric// For example, given the following code: 27580b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 27590b57cec5SDimitry Andric// A[0] += B[0]; 27600b57cec5SDimitry Andric// return A; 27610b57cec5SDimitry Andric// } 27620b57cec5SDimitry Andric// 27630b57cec5SDimitry Andric// Previously we generated: 27640b57cec5SDimitry Andric// addss %xmm0, %xmm1 27650b57cec5SDimitry Andric// movss %xmm1, %xmm0 27660b57cec5SDimitry Andric// 27670b57cec5SDimitry Andric// We now generate: 27680b57cec5SDimitry Andric// addss %xmm1, %xmm0 27690b57cec5SDimitry Andric// 27700b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert 27710b57cec5SDimitry Andric// 27720b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction 27730b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction. 27740b57cec5SDimitry Andric// 27750b57cec5SDimitry Andric// For example, given the following code: 27760b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 27770b57cec5SDimitry Andric// __m128 C = A + B; 27780b57cec5SDimitry Andric// return (__m128) {c[0], a[1], a[2], a[3]}; 27790b57cec5SDimitry Andric// } 27800b57cec5SDimitry Andric// 27810b57cec5SDimitry Andric// Previously we generated: 27820b57cec5SDimitry Andric// addps %xmm0, %xmm1 27830b57cec5SDimitry Andric// movss %xmm1, %xmm0 27840b57cec5SDimitry Andric// 27850b57cec5SDimitry Andric// We now generate: 27860b57cec5SDimitry Andric// addss %xmm1, %xmm0 27870b57cec5SDimitry Andric 27880b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of 27890b57cec5SDimitry Andric// patterns we have to try to match. 2790fe6060f1SDimitry Andricmulticlass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move, 27910b57cec5SDimitry Andric ValueType VT, ValueType EltTy, 27920b57cec5SDimitry Andric RegisterClass RC, PatFrag ld_frag, 27930b57cec5SDimitry Andric Predicate BasePredicate> { 27940b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 27950b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 27960b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 27970b57cec5SDimitry Andric (VT (scalar_to_vector 27980b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 27990b57cec5SDimitry Andric RC:$src))))), 28000b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 28010b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 28020b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 28030b57cec5SDimitry Andric (VT (scalar_to_vector 28040b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 28050b57cec5SDimitry Andric (ld_frag addr:$src)))))), 28060b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 28070b57cec5SDimitry Andric } 28080b57cec5SDimitry Andric 28090b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 28100b57cec5SDimitry Andric let Predicates = [UseAVX] in { 28110b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 28120b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 28130b57cec5SDimitry Andric (VT (scalar_to_vector 28140b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 28150b57cec5SDimitry Andric RC:$src))))), 28160b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 28170b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 28180b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 28190b57cec5SDimitry Andric (VT (scalar_to_vector 28200b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 28210b57cec5SDimitry Andric (ld_frag addr:$src)))))), 28220b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 28230b57cec5SDimitry Andric } 28240b57cec5SDimitry Andric} 28250b57cec5SDimitry Andric 2826480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2827480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2828480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2829480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 28300b57cec5SDimitry Andric 2831480093f4SDimitry Andricdefm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2832480093f4SDimitry Andricdefm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2833480093f4SDimitry Andricdefm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2834480093f4SDimitry Andricdefm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 28350b57cec5SDimitry Andric 28360b57cec5SDimitry Andric/// Unop Arithmetic 28370b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 28380b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 28390b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a 28400b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined. 28410b57cec5SDimitry Andric/// 28420b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form. 28430b57cec5SDimitry Andric 28440b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form 28450b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because 28460b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive. 28470b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2848349cc55cSDimitry Andric X86MemOperand x86memop, Operand intmemop, 2849349cc55cSDimitry Andric SDPatternOperator OpNode, Domain d, 28500b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 28510b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 28520b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 28530b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 28540b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 28550b57cec5SDimitry Andric Requires<[target]>; 28560b57cec5SDimitry Andric let mayLoad = 1 in 28570b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 28580b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 28590b57cec5SDimitry Andric [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 28600b57cec5SDimitry Andric Sched<[sched.Folded]>, 28610b57cec5SDimitry Andric Requires<[target, OptForSize]>; 28620b57cec5SDimitry Andric } 28630b57cec5SDimitry Andric 28640b57cec5SDimitry Andric let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 28650b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 28660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 28670b57cec5SDimitry Andric Sched<[sched]>; 28680b57cec5SDimitry Andric let mayLoad = 1 in 28690b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 28700b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 28710b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 28720b57cec5SDimitry Andric } 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric} 28750b57cec5SDimitry Andric 2876349cc55cSDimitry Andricmulticlass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 2877349cc55cSDimitry Andric Intrinsic Intr, Predicate target> { 28780b57cec5SDimitry Andric let Predicates = [target] in { 28790b57cec5SDimitry Andric // These are unary operations, but they are modeled as having 2 source operands 28800b57cec5SDimitry Andric // because the high elements of the destination are unchanged in SSE. 28810b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 28820b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 28830b57cec5SDimitry Andric } 28840b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 28850b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 28860b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 28870b57cec5SDimitry Andric // movss mem, %xmm0 28880b57cec5SDimitry Andric // rcpss %xmm0, %xmm0 28890b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 28900b57cec5SDimitry Andric // rcpss mem, %xmm0 28910b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 28925ffd83dbSDimitry Andric def : Pat<(Intr (mem_frags addr:$src2)), 28930b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 28940b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 28950b57cec5SDimitry Andric } 28960b57cec5SDimitry Andric} 28970b57cec5SDimitry Andric 2898349cc55cSDimitry Andricmulticlass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 28990b57cec5SDimitry Andric Intrinsic Intr, Predicate target> { 29000b57cec5SDimitry Andric let Predicates = [target] in { 29010b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 29020b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, 29030b57cec5SDimitry Andric VR128:$src)>; 29040b57cec5SDimitry Andric } 29050b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 29065ffd83dbSDimitry Andric def : Pat<(Intr (mem_frags addr:$src2)), 29070b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 29080b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 29090b57cec5SDimitry Andric } 29100b57cec5SDimitry Andric} 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 29130b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 2914fe6060f1SDimitry Andric Operand intmemop, SDPatternOperator OpNode, Domain d, 29150b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 29160b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 29170b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 29180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29190b57cec5SDimitry Andric [], d>, Sched<[sched]>; 29200b57cec5SDimitry Andric let mayLoad = 1 in 29210b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 29220b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29230b57cec5SDimitry Andric [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 29240b57cec5SDimitry Andric } 29250b57cec5SDimitry Andric let hasSideEffects = 0, ExeDomain = d in { 29260b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 29270b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 29280b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29290b57cec5SDimitry Andric []>, Sched<[sched]>; 29300b57cec5SDimitry Andric let mayLoad = 1 in 29310b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 29320b57cec5SDimitry Andric (ins VR128:$src1, intmemop:$src2), 29330b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 29340b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 29350b57cec5SDimitry Andric } 29360b57cec5SDimitry Andric 29370b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 29380b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 29390b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 29400b57cec5SDimitry Andric // vmovss mem, %xmm0 29410b57cec5SDimitry Andric // vrcpss %xmm0, %xmm0, %xmm0 29420b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 29430b57cec5SDimitry Andric // vrcpss mem, %xmm0, %xmm0 29440b57cec5SDimitry Andric // TODO: In theory, we could fold the load, and avoid the stall caused by 29450b57cec5SDimitry Andric // the partial register store, either in BreakFalseDeps or with smarter RA. 29460b57cec5SDimitry Andric let Predicates = [target] in { 29470b57cec5SDimitry Andric def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 29480b57cec5SDimitry Andric (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 29490b57cec5SDimitry Andric } 29500b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 29510b57cec5SDimitry Andric def : Pat<(ScalarVT (OpNode (load addr:$src))), 29520b57cec5SDimitry Andric (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 29530b57cec5SDimitry Andric addr:$src)>; 29540b57cec5SDimitry Andric } 29550b57cec5SDimitry Andric} 29560b57cec5SDimitry Andric 29570b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form. 2958fe6060f1SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 29590b57cec5SDimitry Andric X86SchedWriteWidths sched, list<Predicate> prds> { 29600b57cec5SDimitry Andriclet Predicates = prds in { 29610b57cec5SDimitry Andric def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29620b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29630b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29640b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 296506c3fb27SDimitry Andric VEX, Sched<[sched.XMM]>, WIG; 29660b57cec5SDimitry Andric def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29670b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29680b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29690b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 297006c3fb27SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, WIG; 29710b57cec5SDimitry Andric def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 29720b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29730b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29740b57cec5SDimitry Andric [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 297506c3fb27SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, WIG; 29760b57cec5SDimitry Andric def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 29770b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29780b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 29790b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 298006c3fb27SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG; 29810b57cec5SDimitry Andric} 29820b57cec5SDimitry Andric 29830b57cec5SDimitry Andric def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29840b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 29850b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 29860b57cec5SDimitry Andric Sched<[sched.XMM]>; 29870b57cec5SDimitry Andric def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 29880b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 29890b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 29900b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 29910b57cec5SDimitry Andric} 29920b57cec5SDimitry Andric 29930b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms. 29940b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2995fe6060f1SDimitry Andric SDPatternOperator OpNode, X86SchedWriteWidths sched> { 29960b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 29970b57cec5SDimitry Andric def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 29980b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 29990b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 30000b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 300106c3fb27SDimitry Andric VEX, Sched<[sched.XMM]>, WIG; 30020b57cec5SDimitry Andric def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 30030b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 30040b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 30050b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 300606c3fb27SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, WIG; 30070b57cec5SDimitry Andric def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 30080b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 30090b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 30100b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 301106c3fb27SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, WIG; 30120b57cec5SDimitry Andric def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 30130b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 30140b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 30150b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 301606c3fb27SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG; 30170b57cec5SDimitry Andric} 30180b57cec5SDimitry Andric 30190b57cec5SDimitry Andric def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 30200b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 30210b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 30220b57cec5SDimitry Andric Sched<[sched.XMM]>; 30230b57cec5SDimitry Andric def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 30240b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 30250b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 30260b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 30270b57cec5SDimitry Andric} 30280b57cec5SDimitry Andric 3029349cc55cSDimitry Andricmulticlass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> { 3030349cc55cSDimitry Andric defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32, 30315ffd83dbSDimitry Andric !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 3032cb14a3feSDimitry Andric UseSSE1>, TB, XS; 3033349cc55cSDimitry Andric defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32, 30345ffd83dbSDimitry Andric !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 30350b57cec5SDimitry Andric AVXTarget>, 3036cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG; 30370b57cec5SDimitry Andric} 30380b57cec5SDimitry Andric 3039fe6060f1SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 30400b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 3041349cc55cSDimitry Andric defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem, 3042cb14a3feSDimitry Andric ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, TB, XS; 30435ffd83dbSDimitry Andric defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32, 30440b57cec5SDimitry Andric f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 3045cb14a3feSDimitry Andric TB, XS, VEX, VVVV, VEX_LIG, WIG; 30460b57cec5SDimitry Andric} 30470b57cec5SDimitry Andric 3048fe6060f1SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 30490b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 3050349cc55cSDimitry Andric defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem, 3051cb14a3feSDimitry Andric sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, TB, XD; 30525ffd83dbSDimitry Andric defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64, 30530b57cec5SDimitry Andric f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 3054cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_LIG, WIG; 30550b57cec5SDimitry Andric} 30560b57cec5SDimitry Andric 30570b57cec5SDimitry Andric// Square root. 3058480093f4SDimitry Andricdefm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, 3059480093f4SDimitry Andric sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 3060480093f4SDimitry Andric sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, 3061480093f4SDimitry Andric sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement 30640b57cec5SDimitry Andric// in order to obtain suitable precision. 30650b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 3066349cc55cSDimitry Andric sse1_fp_unop_s_intr<"rsqrt", HasAVX>, 30670b57cec5SDimitry Andric sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 30680b57cec5SDimitry Andricdefm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 3069349cc55cSDimitry Andric sse1_fp_unop_s_intr<"rcp", HasAVX>, 30700b57cec5SDimitry Andric sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 30710b57cec5SDimitry Andric 30720b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions. 30730b57cec5SDimitry Andric 3074fe6060f1SDimitry Andricmulticlass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move, 30750b57cec5SDimitry Andric ValueType VT, Predicate BasePredicate> { 30760b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 30770b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 30780b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 30790b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30800b57cec5SDimitry Andric } 30810b57cec5SDimitry Andric 30820b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 30830b57cec5SDimitry Andric let Predicates = [UseAVX] in { 30840b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 30850b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 30860b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30870b57cec5SDimitry Andric } 30880b57cec5SDimitry Andric} 30890b57cec5SDimitry Andric 3090480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 3091480093f4SDimitry Andricdefm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 30940b57cec5SDimitry Andric SDNode Move, ValueType VT, 30950b57cec5SDimitry Andric Predicate BasePredicate> { 30960b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 30970b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 30980b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 30990b57cec5SDimitry Andric } 31000b57cec5SDimitry Andric 31010b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 31020b57cec5SDimitry Andric let Predicates = [HasAVX] in { 31030b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 31040b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 31050b57cec5SDimitry Andric } 31060b57cec5SDimitry Andric} 31070b57cec5SDimitry Andric 31080b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 31090b57cec5SDimitry Andric v4f32, UseSSE1>; 31100b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 31110b57cec5SDimitry Andric v4f32, UseSSE1>; 31120b57cec5SDimitry Andric 31130b57cec5SDimitry Andric 31140b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31150b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores 31160b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 31170b57cec5SDimitry Andric 31180b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 31190b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 31200b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 31210b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 31220b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 31230b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31240b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), 312506c3fb27SDimitry Andric addr:$dst)]>, VEX, WIG; 31260b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 31270b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 31280b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31290b57cec5SDimitry Andric [(alignednontemporalstore (v2f64 VR128:$src), 313006c3fb27SDimitry Andric addr:$dst)]>, VEX, WIG; 31310b57cec5SDimitry Andric} // SchedRW 31320b57cec5SDimitry Andric 31330b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 31340b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 31350b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 31360b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31370b57cec5SDimitry Andric [(alignednontemporalstore (v8f32 VR256:$src), 313806c3fb27SDimitry Andric addr:$dst)]>, VEX, VEX_L, WIG; 31390b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 31400b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 31410b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31420b57cec5SDimitry Andric [(alignednontemporalstore (v4f64 VR256:$src), 314306c3fb27SDimitry Andric addr:$dst)]>, VEX, VEX_L, WIG; 31440b57cec5SDimitry Andric} // SchedRW 31450b57cec5SDimitry Andric 31460b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 31470b57cec5SDimitry Andricdef VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 31480b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 31490b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31500b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), 315106c3fb27SDimitry Andric addr:$dst)]>, VEX, WIG, 31520b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 31530b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 31540b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 31550b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31560b57cec5SDimitry Andric [(alignednontemporalstore (v4i64 VR256:$src), 315706c3fb27SDimitry Andric addr:$dst)]>, VEX, VEX_L, WIG, 31580b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 31590b57cec5SDimitry Andric} // ExeDomain 31600b57cec5SDimitry Andric} // Predicates 31610b57cec5SDimitry Andric 31620b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 31630b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31640b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 31650b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 31660b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31670b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 31680b57cec5SDimitry Andric [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 31690b57cec5SDimitry Andric} // SchedRW 31700b57cec5SDimitry Andric 31710b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 31720b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 31730b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 31740b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 31750b57cec5SDimitry Andric 31760b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in { 31770b57cec5SDimitry Andric// There is no AVX form for instructions below this point 31780b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 31790b57cec5SDimitry Andric "movnti{l}\t{$src, $dst|$dst, $src}", 31800b57cec5SDimitry Andric [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 3181cb14a3feSDimitry Andric TB, Requires<[HasSSE2]>; 31820b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 31830b57cec5SDimitry Andric "movnti{q}\t{$src, $dst|$dst, $src}", 31840b57cec5SDimitry Andric [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 3185cb14a3feSDimitry Andric TB, Requires<[HasSSE2]>; 31860b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT] 31870b57cec5SDimitry Andric 31880b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 31890b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 31900b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31910b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 31920b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3193fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst), 3194fcaf7f86SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31950b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 31960b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 31990b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 32000b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 32010b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 3202fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), 3203fcaf7f86SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 32040b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 32050b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 32060b57cec5SDimitry Andric} 32070b57cec5SDimitry Andric 32080b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 32090b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 32100b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 32110b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 32120b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 3213fcaf7f86SDimitry Andric def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), 3214fcaf7f86SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 32150b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 32160b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 32170b57cec5SDimitry Andric} 32180b57cec5SDimitry Andric 32190b57cec5SDimitry Andric} // AddedComplexity 32200b57cec5SDimitry Andric 32210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32220b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence 32230b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32240b57cec5SDimitry Andric 32250b57cec5SDimitry Andric// Prefetch intrinsic. 32260b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 32270b57cec5SDimitry Andricdef PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 32285f757f3fSDimitry Andric "prefetcht0\t$src", [(prefetch addr:$src, timm, (i32 3), (i32 1))]>, TB; 32290b57cec5SDimitry Andricdef PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 32305f757f3fSDimitry Andric "prefetcht1\t$src", [(prefetch addr:$src, timm, (i32 2), (i32 1))]>, TB; 32310b57cec5SDimitry Andricdef PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 32325f757f3fSDimitry Andric "prefetcht2\t$src", [(prefetch addr:$src, timm, (i32 1), (i32 1))]>, TB; 32330b57cec5SDimitry Andricdef PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 32345f757f3fSDimitry Andric "prefetchnta\t$src", [(prefetch addr:$src, timm, (i32 0), (i32 1))]>, TB; 32350b57cec5SDimitry Andric} 32360b57cec5SDimitry Andric 32370b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled? 32380b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in { 32390b57cec5SDimitry Andric// Flush cache 32400b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 32410b57cec5SDimitry Andric "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3242cb14a3feSDimitry Andric TB, Requires<[HasCLFLUSH]>; 32430b57cec5SDimitry Andric} 32440b57cec5SDimitry Andric 32450b57cec5SDimitry Andriclet SchedRW = [WriteNop] in { 32460b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it 32470b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible. 32480b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins), 3249cb14a3feSDimitry Andric "pause", [(int_x86_sse2_pause)]>, XS; 32500b57cec5SDimitry Andric} 32510b57cec5SDimitry Andric 32520b57cec5SDimitry Andriclet SchedRW = [WriteFence] in { 32530b57cec5SDimitry Andric// Load, store, and memory fence 32545ffd83dbSDimitry Andric// TODO: As with mfence, we may want to ease the availability of sfence/lfence 32550b57cec5SDimitry Andric// to include any 64-bit target. 32565ffd83dbSDimitry Andricdef SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 3257cb14a3feSDimitry Andric TB, Requires<[HasSSE1]>; 32585ffd83dbSDimitry Andricdef LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 3259cb14a3feSDimitry Andric TB, Requires<[HasSSE2]>; 32605ffd83dbSDimitry Andricdef MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 3261cb14a3feSDimitry Andric TB, Requires<[HasMFence]>; 32620b57cec5SDimitry Andric} // SchedRW 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>; 32650b57cec5SDimitry Andric 32660b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32670b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register 32680b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 32690b57cec5SDimitry Andric 3270bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in 32710b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 32720b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 327306c3fb27SDimitry Andric VEX, Sched<[WriteLDMXCSR]>, WIG; 3274bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in 32750b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 32760b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 327706c3fb27SDimitry Andric VEX, Sched<[WriteSTMXCSR]>, WIG; 32780b57cec5SDimitry Andric 3279bdd1243dSDimitry Andriclet mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in 32800b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 32810b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3282cb14a3feSDimitry Andric TB, Sched<[WriteLDMXCSR]>; 3283bdd1243dSDimitry Andriclet mayStore=1, hasSideEffects=1, Uses=[MXCSR] in 32840b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 32850b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3286cb14a3feSDimitry Andric TB, Sched<[WriteSTMXCSR]>; 32870b57cec5SDimitry Andric 32880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 32890b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 32900b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 32910b57cec5SDimitry Andric 32920b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 32930b57cec5SDimitry Andric 32940b57cec5SDimitry Andriclet hasSideEffects = 0 in { 32950b57cec5SDimitry Andricdef VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32960b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 329706c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG; 32980b57cec5SDimitry Andricdef VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 32990b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 330006c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG; 33010b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 33020b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 330306c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG; 33040b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 33050b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 330606c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG; 33070b57cec5SDimitry Andric} 33080b57cec5SDimitry Andric 33090b57cec5SDimitry Andric// For Disassembler 33100b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 33110b57cec5SDimitry Andricdef VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 33120b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33130b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 331406c3fb27SDimitry Andric VEX, WIG; 33150b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 33160b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33170b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 331806c3fb27SDimitry Andric VEX, VEX_L, WIG; 33190b57cec5SDimitry Andricdef VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 33200b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 33210b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 332206c3fb27SDimitry Andric VEX, WIG; 33230b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 33240b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 33250b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 332606c3fb27SDimitry Andric VEX, VEX_L, WIG; 33270b57cec5SDimitry Andric} 33280b57cec5SDimitry Andric 33290b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 33300b57cec5SDimitry Andric hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 33310b57cec5SDimitry Andricdef VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33320b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33330b57cec5SDimitry Andric [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 333406c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG; 33350b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 33360b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 33370b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 333806c3fb27SDimitry Andric VEX, VEX_L, WIG; 33390b57cec5SDimitry Andricdef VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33400b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 33410b57cec5SDimitry Andric [(set VR128:$dst, (loadv2i64 addr:$src))]>, 33420b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, 3343cb14a3feSDimitry Andric TB, XS, VEX, WIG; 33440b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 33450b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", []>, 33460b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3347cb14a3feSDimitry Andric TB, XS, VEX, VEX_L, WIG; 33480b57cec5SDimitry Andric} 33490b57cec5SDimitry Andric 33500b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 33510b57cec5SDimitry Andricdef VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 33520b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 33530b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33540b57cec5SDimitry Andric [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 335506c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, WIG; 33560b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 33570b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 33580b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 335906c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, WIG; 33600b57cec5SDimitry Andricdef VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 33610b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 33620b57cec5SDimitry Andric [(store (v2i64 VR128:$src), addr:$dst)]>, 3363cb14a3feSDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, TB, XS, VEX, WIG; 33640b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 33650b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 3366cb14a3feSDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, TB, XS, VEX, VEX_L, WIG; 33670b57cec5SDimitry Andric} 33680b57cec5SDimitry Andric 33690b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 33700b57cec5SDimitry Andriclet hasSideEffects = 0 in { 33710b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 33720b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>; 33730b57cec5SDimitry Andric 33740b57cec5SDimitry Andricdef MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 33750b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3376cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; 33770b57cec5SDimitry Andric} 33780b57cec5SDimitry Andric 33790b57cec5SDimitry Andric// For Disassembler 33800b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 33810b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 338206c3fb27SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>; 33830b57cec5SDimitry Andric 33840b57cec5SDimitry Andricdef MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 33850b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3386cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; 33870b57cec5SDimitry Andric} 33880b57cec5SDimitry Andric} // SchedRW 33890b57cec5SDimitry Andric 33900b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 33910b57cec5SDimitry Andric hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 33920b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33930b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 33940b57cec5SDimitry Andric [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 33950b57cec5SDimitry Andricdef MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 33960b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 33970b57cec5SDimitry Andric [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 3398cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; 33990b57cec5SDimitry Andric} 34000b57cec5SDimitry Andric 34010b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, 34020b57cec5SDimitry Andric SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 34030b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 34040b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 34050b57cec5SDimitry Andric [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 34060b57cec5SDimitry Andricdef MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 34070b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 34080b57cec5SDimitry Andric [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 3409cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; 34100b57cec5SDimitry Andric} 34110b57cec5SDimitry Andric 34120b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 34130b57cec5SDimitry Andric 34140b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 34150b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 34160b57cec5SDimitry Andric (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 34170b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 34180b57cec5SDimitry Andric (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 34190b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 34200b57cec5SDimitry Andric (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 34210b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 34220b57cec5SDimitry Andric (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 34230b57cec5SDimitry Andric 34240b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 34250b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 34260b57cec5SDimitry Andric (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 34270b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 34280b57cec5SDimitry Andric (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 34290b57cec5SDimitry Andric 34300b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 34310b57cec5SDimitry Andric // Additional patterns for other integer sizes. 34320b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 34330b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 34340b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 34350b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 3436fcaf7f86SDimitry Andric def : Pat<(alignedloadv8f16 addr:$src), 3437fcaf7f86SDimitry Andric (VMOVDQArm addr:$src)>; 34380b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 34390b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 34400b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 34410b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 34420b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 34430b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 3444fcaf7f86SDimitry Andric def : Pat<(loadv8f16 addr:$src), 3445fcaf7f86SDimitry Andric (VMOVDQUrm addr:$src)>; 34460b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 34470b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 34480b57cec5SDimitry Andric 34490b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 34500b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34510b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 34520b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 3453fcaf7f86SDimitry Andric def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), 3454fcaf7f86SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34550b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 34560b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 34570b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 34580b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34590b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 34600b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 3461fcaf7f86SDimitry Andric def : Pat<(store (v8f16 VR128:$src), addr:$dst), 3462fcaf7f86SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34630b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 34640b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 34650b57cec5SDimitry Andric} 34660b57cec5SDimitry Andric 34670b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34680b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions 34690b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 34700b57cec5SDimitry Andric 34710b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 34720b57cec5SDimitry Andric 34730b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 34740b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 34750b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, RegisterClass RC, 34760b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 34770b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 34780b57cec5SDimitry Andric let isCommutable = 1 in 34790b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 34800b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 34810b57cec5SDimitry Andric !if(Is2Addr, 34820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34830b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34840b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 34850b57cec5SDimitry Andric Sched<[sched]>; 34860b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 34870b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 34880b57cec5SDimitry Andric !if(Is2Addr, 34890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 34900b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 34910b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 34920b57cec5SDimitry Andric (memop_frag addr:$src2))))]>, 34930b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 34940b57cec5SDimitry Andric} 34950b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 34960b57cec5SDimitry Andric 34970b57cec5SDimitry Andricdefm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 34980b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 34990b57cec5SDimitry Andricdefm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 35000b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35010b57cec5SDimitry Andricdefm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 35020b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 35030b57cec5SDimitry Andricdefm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 35040b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 35050b57cec5SDimitry Andricdefm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 35060b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35070b57cec5SDimitry Andricdefm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 35080b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35090b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 35100b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35110b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 35120b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35130b57cec5SDimitry Andricdefm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 35140b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 35150b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 35160b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 35170b57cec5SDimitry Andricdefm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 35180b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 35190b57cec5SDimitry Andricdefm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 35200b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35210b57cec5SDimitry Andricdefm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 35220b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35230b57cec5SDimitry Andricdefm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 35240b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 35250b57cec5SDimitry Andricdefm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 35260b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 35270b57cec5SDimitry Andricdefm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 35280b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35290b57cec5SDimitry Andricdefm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 35300b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35310b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 35320b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35330b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 35340b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 35350b57cec5SDimitry Andricdefm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 35360b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35370b57cec5SDimitry Andricdefm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 35380b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35390b57cec5SDimitry Andricdefm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 35400b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35410b57cec5SDimitry Andricdefm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 35420b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 354381ad6265SDimitry Andricdefm PAVGB : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8, 35440b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 354581ad6265SDimitry Andricdefm PAVGW : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16, 35460b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 35470b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 35480b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX>; 35490b57cec5SDimitry Andric 35500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 35510b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 35520b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 3553cb14a3feSDimitry Andric VEX, VVVV, WIG; 35540b57cec5SDimitry Andric 35550b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 35560b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 35570b57cec5SDimitry Andric VR256, load, i256mem, SchedWriteVecIMul.YMM, 3558cb14a3feSDimitry Andric 0>, VEX, VVVV, VEX_L, WIG; 35590b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35600b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 35610b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM>; 35620b57cec5SDimitry Andric 35630b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 35640b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 35650b57cec5SDimitry Andric load, i128mem, SchedWritePSADBW.XMM, 0>, 3566cb14a3feSDimitry Andric VEX, VVVV, WIG; 35670b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 35680b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 35690b57cec5SDimitry Andric load, i256mem, SchedWritePSADBW.YMM, 0>, 3570cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 35710b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 35720b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 35730b57cec5SDimitry Andric memop, i128mem, SchedWritePSADBW.XMM>; 35740b57cec5SDimitry Andric 35750b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35760b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 35770b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 35780b57cec5SDimitry Andric 35790b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 35800b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 35810b57cec5SDimitry Andric SDNode OpNode2, RegisterClass RC, 35820b57cec5SDimitry Andric X86FoldableSchedWrite sched, 35830b57cec5SDimitry Andric X86FoldableSchedWrite schedImm, 35840b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, 35850b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 35860b57cec5SDimitry Andric // src2 is always 128-bit 35870b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 35880b57cec5SDimitry Andric (ins RC:$src1, VR128:$src2), 35890b57cec5SDimitry Andric !if(Is2Addr, 35900b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35910b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 35920b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 35930b57cec5SDimitry Andric Sched<[sched]>; 35940b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 35950b57cec5SDimitry Andric (ins RC:$src1, i128mem:$src2), 35960b57cec5SDimitry Andric !if(Is2Addr, 35970b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35980b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 35990b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, 36000b57cec5SDimitry Andric (SrcVT (ld_frag addr:$src2)))))]>, 36010b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 36020b57cec5SDimitry Andric def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 36030b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 36040b57cec5SDimitry Andric !if(Is2Addr, 36050b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36060b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36078bcb0991SDimitry Andric [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, 36080b57cec5SDimitry Andric Sched<[schedImm]>; 36090b57cec5SDimitry Andric} 36100b57cec5SDimitry Andric 36110b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 36120b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 36130b57cec5SDimitry Andric SDNode OpNode2, ValueType DstVT128, 36140b57cec5SDimitry Andric ValueType DstVT256, ValueType SrcVT, 36150b57cec5SDimitry Andric X86SchedWriteWidths sched, 36160b57cec5SDimitry Andric X86SchedWriteWidths schedImm, Predicate prd> { 36170b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 36180b57cec5SDimitry Andric defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 36190b57cec5SDimitry Andric OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 3620cb14a3feSDimitry Andric DstVT128, SrcVT, load, 0>, VEX, VVVV, WIG; 36210b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 36220b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 36230b57cec5SDimitry Andric OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 3624cb14a3feSDimitry Andric DstVT256, SrcVT, load, 0>, VEX, VVVV, VEX_L, 362506c3fb27SDimitry Andric WIG; 36260b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 36270b57cec5SDimitry Andric defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 36280b57cec5SDimitry Andric VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 36290b57cec5SDimitry Andric memop>; 36300b57cec5SDimitry Andric} 36310b57cec5SDimitry Andric 36320b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 36330b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, ValueType VT, 36340b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 36350b57cec5SDimitry Andric def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 36360b57cec5SDimitry Andric !if(Is2Addr, 36370b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36380b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36398bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, 36400b57cec5SDimitry Andric Sched<[sched]>; 36410b57cec5SDimitry Andric} 36420b57cec5SDimitry Andric 36430b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 36440b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 36450b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 36460b57cec5SDimitry Andric defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3647cb14a3feSDimitry Andric VR128, v16i8, sched.XMM, 0>, VEX, VVVV, WIG; 36480b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 36490b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 36500b57cec5SDimitry Andric VR256, v32i8, sched.YMM, 0>, 3651cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 36520b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 36530b57cec5SDimitry Andric defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 36540b57cec5SDimitry Andric sched.XMM>; 36550b57cec5SDimitry Andric} 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 36580b57cec5SDimitry Andric defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 36590b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36600b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36610b57cec5SDimitry Andric defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 36620b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36630b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36640b57cec5SDimitry Andric defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 36650b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 36660b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36670b57cec5SDimitry Andric 36680b57cec5SDimitry Andric defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 36690b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36700b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36710b57cec5SDimitry Andric defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 36720b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36730b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36740b57cec5SDimitry Andric defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 36750b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 36760b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36770b57cec5SDimitry Andric 36780b57cec5SDimitry Andric defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 36790b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 36800b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 36810b57cec5SDimitry Andric defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 36820b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 36830b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 36840b57cec5SDimitry Andric 36850b57cec5SDimitry Andric defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 36860b57cec5SDimitry Andric SchedWriteShuffle>; 36870b57cec5SDimitry Andric defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 36880b57cec5SDimitry Andric SchedWriteShuffle>; 36890b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 36900b57cec5SDimitry Andric 36910b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36920b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions 36930b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 36940b57cec5SDimitry Andric 36950b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 36960b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 36970b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 36980b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 36990b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 37000b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 37010b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 37020b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 37030b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 37040b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 37050b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 37060b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 37070b57cec5SDimitry Andric 37080b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37090b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions 37100b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37110b57cec5SDimitry Andric 37120b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 37130b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 37140b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, 37150b57cec5SDimitry Andric Predicate prd> { 37160b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in { 37170b57cec5SDimitry Andric def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 37180b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 37190b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37200b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37210b57cec5SDimitry Andric [(set VR128:$dst, 37228bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 372306c3fb27SDimitry Andric VEX, Sched<[sched.XMM]>, WIG; 37240b57cec5SDimitry Andric def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 37250b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 37260b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37270b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37280b57cec5SDimitry Andric [(set VR128:$dst, 37290b57cec5SDimitry Andric (vt128 (OpNode (load addr:$src1), 37308bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, 373106c3fb27SDimitry Andric Sched<[sched.XMM.Folded]>, WIG; 37320b57cec5SDimitry Andric} 37330b57cec5SDimitry Andric 37340b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in { 37350b57cec5SDimitry Andric def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 37360b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 37370b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37380b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37390b57cec5SDimitry Andric [(set VR256:$dst, 37408bcb0991SDimitry Andric (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, 374106c3fb27SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, WIG; 37420b57cec5SDimitry Andric def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 37430b57cec5SDimitry Andric (ins i256mem:$src1, u8imm:$src2), 37440b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 37450b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37460b57cec5SDimitry Andric [(set VR256:$dst, 37470b57cec5SDimitry Andric (vt256 (OpNode (load addr:$src1), 37488bcb0991SDimitry Andric (i8 timm:$src2))))]>, VEX, VEX_L, 374906c3fb27SDimitry Andric Sched<[sched.YMM.Folded]>, WIG; 37500b57cec5SDimitry Andric} 37510b57cec5SDimitry Andric 37520b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 37530b57cec5SDimitry Andric def ri : Ii8<0x70, MRMSrcReg, 37540b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 37550b57cec5SDimitry Andric !strconcat(OpcodeStr, 37560b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37570b57cec5SDimitry Andric [(set VR128:$dst, 37588bcb0991SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 37590b57cec5SDimitry Andric Sched<[sched.XMM]>; 37600b57cec5SDimitry Andric def mi : Ii8<0x70, MRMSrcMem, 37610b57cec5SDimitry Andric (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 37620b57cec5SDimitry Andric !strconcat(OpcodeStr, 37630b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 37640b57cec5SDimitry Andric [(set VR128:$dst, 37650b57cec5SDimitry Andric (vt128 (OpNode (memop addr:$src1), 37668bcb0991SDimitry Andric (i8 timm:$src2))))]>, 37670b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 37680b57cec5SDimitry Andric} 37690b57cec5SDimitry Andric} 37700b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 37710b57cec5SDimitry Andric 37720b57cec5SDimitry Andricdefm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 3773cb14a3feSDimitry Andric SchedWriteShuffle, NoVLX>, TB, PD; 37740b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 3775cb14a3feSDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XS; 37760b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 3777cb14a3feSDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, TB, XD; 37780b57cec5SDimitry Andric 37790b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37800b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX) 37810b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 37820b57cec5SDimitry Andric 37830b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 37840b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 37850b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 37860b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 37870b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 37880b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 37890b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 37900b57cec5SDimitry Andric !if(Is2Addr, 37910b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 37920b57cec5SDimitry Andric !strconcat(OpcodeStr, 37930b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37940b57cec5SDimitry Andric [(set RC:$dst, 37950b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 37960b57cec5SDimitry Andric Sched<[sched]>; 37970b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 37980b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 37990b57cec5SDimitry Andric !if(Is2Addr, 38000b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 38010b57cec5SDimitry Andric !strconcat(OpcodeStr, 38020b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38030b57cec5SDimitry Andric [(set RC:$dst, 38040b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 38050b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 38060b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 38070b57cec5SDimitry Andric} 38080b57cec5SDimitry Andric 38090b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 38100b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 38110b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 38120b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 38130b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, 38140b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 38150b57cec5SDimitry Andric !if(Is2Addr, 38160b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 38170b57cec5SDimitry Andric !strconcat(OpcodeStr, 38180b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38190b57cec5SDimitry Andric [(set RC:$dst, 38200b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 38210b57cec5SDimitry Andric Sched<[sched]>; 38220b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, 38230b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 38240b57cec5SDimitry Andric !if(Is2Addr, 38250b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 38260b57cec5SDimitry Andric !strconcat(OpcodeStr, 38270b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38280b57cec5SDimitry Andric [(set RC:$dst, 38290b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 38300b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 38310b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 38320b57cec5SDimitry Andric} 38330b57cec5SDimitry Andric 38340b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 38350b57cec5SDimitry Andric defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 38360b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3837cb14a3feSDimitry Andric VEX, VVVV, WIG; 38380b57cec5SDimitry Andric defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 38390b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3840cb14a3feSDimitry Andric VEX, VVVV, WIG; 38410b57cec5SDimitry Andric 38420b57cec5SDimitry Andric defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 38430b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3844cb14a3feSDimitry Andric VEX, VVVV, WIG; 38450b57cec5SDimitry Andric defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 38460b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3847cb14a3feSDimitry Andric VEX, VVVV, WIG; 38480b57cec5SDimitry Andric} 38490b57cec5SDimitry Andric 38500b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 38510b57cec5SDimitry Andric defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 38520b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3853cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 38540b57cec5SDimitry Andric defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 38550b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3856cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 38570b57cec5SDimitry Andric 38580b57cec5SDimitry Andric defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 38590b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3860cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 38610b57cec5SDimitry Andric defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 38620b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3863cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 38640b57cec5SDimitry Andric} 38650b57cec5SDimitry Andric 38660b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 38670b57cec5SDimitry Andric defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 38680b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38690b57cec5SDimitry Andric defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 38700b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38710b57cec5SDimitry Andric 38720b57cec5SDimitry Andric defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 38730b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38740b57cec5SDimitry Andric 38750b57cec5SDimitry Andric defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 38760b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 38770b57cec5SDimitry Andric} 38780b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 38790b57cec5SDimitry Andric 38800b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38810b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions 38820b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 38830b57cec5SDimitry Andric 38840b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 38850b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 38860b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 38870b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 38880b57cec5SDimitry Andric bit Is2Addr = 1> { 38890b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 38900b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 38910b57cec5SDimitry Andric !if(Is2Addr, 38920b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 38930b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 38940b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 38950b57cec5SDimitry Andric Sched<[sched]>; 38960b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 38970b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 38980b57cec5SDimitry Andric !if(Is2Addr, 38990b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 39000b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 39010b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 39020b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 39030b57cec5SDimitry Andric} 39040b57cec5SDimitry Andric 39050b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 39060b57cec5SDimitry Andric defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 39070b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3908cb14a3feSDimitry Andric VEX, VVVV, WIG; 39090b57cec5SDimitry Andric defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 39100b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3911cb14a3feSDimitry Andric VEX, VVVV, WIG; 39120b57cec5SDimitry Andric defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 39130b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3914cb14a3feSDimitry Andric VEX, VVVV, WIG; 39150b57cec5SDimitry Andric defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 39160b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3917cb14a3feSDimitry Andric VEX, VVVV, WIG; 39180b57cec5SDimitry Andric} 39190b57cec5SDimitry Andric 39200b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 39210b57cec5SDimitry Andric defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 39220b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3923cb14a3feSDimitry Andric VEX, VVVV, WIG; 39240b57cec5SDimitry Andric defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 39250b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3926cb14a3feSDimitry Andric VEX, VVVV, WIG; 39270b57cec5SDimitry Andric defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 39280b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3929cb14a3feSDimitry Andric VEX, VVVV, WIG; 39300b57cec5SDimitry Andric defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 39310b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3932cb14a3feSDimitry Andric VEX, VVVV, WIG; 39330b57cec5SDimitry Andric} 39340b57cec5SDimitry Andric 39350b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 39360b57cec5SDimitry Andric defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 39370b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3938cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39390b57cec5SDimitry Andric defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 39400b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3941cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39420b57cec5SDimitry Andric defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 39430b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3944cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39450b57cec5SDimitry Andric defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 39460b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3947cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39480b57cec5SDimitry Andric} 39490b57cec5SDimitry Andric 39500b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 39510b57cec5SDimitry Andric defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 39520b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3953cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39540b57cec5SDimitry Andric defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 39550b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3956cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39570b57cec5SDimitry Andric defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 39580b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3959cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39600b57cec5SDimitry Andric defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 39610b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3962cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 39630b57cec5SDimitry Andric} 39640b57cec5SDimitry Andric 39650b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 39660b57cec5SDimitry Andric defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 39670b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39680b57cec5SDimitry Andric defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 39690b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39700b57cec5SDimitry Andric defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 39710b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39720b57cec5SDimitry Andric defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 39730b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39740b57cec5SDimitry Andric 39750b57cec5SDimitry Andric defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 39760b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39770b57cec5SDimitry Andric defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 39780b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39790b57cec5SDimitry Andric defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 39800b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39810b57cec5SDimitry Andric defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 39820b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 39830b57cec5SDimitry Andric} 39840b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 39850b57cec5SDimitry Andric 39860b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39870b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert 39880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 39910b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> { 39920b57cec5SDimitry Andric def rr : Ii8<0xC4, MRMSrcReg, 39930b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 39940b57cec5SDimitry Andric GR32orGR64:$src2, u8imm:$src3), 39950b57cec5SDimitry Andric !if(Is2Addr, 39960b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 39970b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 39980b57cec5SDimitry Andric [(set VR128:$dst, 3999e8d8bef9SDimitry Andric (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 40000b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 40010b57cec5SDimitry Andric def rm : Ii8<0xC4, MRMSrcMem, 40020b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 40030b57cec5SDimitry Andric i16mem:$src2, u8imm:$src3), 40040b57cec5SDimitry Andric !if(Is2Addr, 40050b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 40060b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 40070b57cec5SDimitry Andric [(set VR128:$dst, 40080b57cec5SDimitry Andric (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4009e8d8bef9SDimitry Andric timm:$src3))]>, 40100b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 40110b57cec5SDimitry Andric} 40120b57cec5SDimitry Andric 40130b57cec5SDimitry Andric// Extract 40140b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 40150b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 40160b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 40170b57cec5SDimitry Andric "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 40180b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4019e8d8bef9SDimitry Andric timm:$src2))]>, 4020cb14a3feSDimitry Andric TB, PD, VEX, WIG, Sched<[WriteVecExtract]>; 40210b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 40220b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 40230b57cec5SDimitry Andric "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 40240b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4025e8d8bef9SDimitry Andric timm:$src2))]>, 40260b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 40270b57cec5SDimitry Andric 40280b57cec5SDimitry Andric// Insert 40290b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 4030cb14a3feSDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, TB, PD, VEX, VVVV, WIG; 40310b57cec5SDimitry Andric 40320b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 4033cb14a3feSDimitry Andricdefm PINSRW : sse2_pinsrw, TB, PD; 40340b57cec5SDimitry Andric 40350b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40360b57cec5SDimitry Andric 403781ad6265SDimitry Andric// Always select FP16 instructions if available. 403881ad6265SDimitry Andriclet Predicates = [UseSSE2], AddedComplexity = -10 in { 403981ad6265SDimitry Andric def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (PINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; 404081ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (MOV16mr addr:$dst, (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit))>; 404181ad6265SDimitry Andric def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; 404281ad6265SDimitry Andric def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (PINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; 404381ad6265SDimitry Andric} 404481ad6265SDimitry Andric 404581ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in { 404681ad6265SDimitry Andric def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>; 404781ad6265SDimitry Andric def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>; 404881ad6265SDimitry Andric def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>; 404981ad6265SDimitry Andric} 405081ad6265SDimitry Andric 40510b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40520b57cec5SDimitry Andric// SSE2 - Packed Mask Creation 40530b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40540b57cec5SDimitry Andric 40550b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 40560b57cec5SDimitry Andric 40570b57cec5SDimitry Andricdef VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 40580b57cec5SDimitry Andric (ins VR128:$src), 40590b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40600b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 406106c3fb27SDimitry Andric Sched<[WriteVecMOVMSK]>, VEX, WIG; 40620b57cec5SDimitry Andric 40630b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 40640b57cec5SDimitry Andricdef VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 40650b57cec5SDimitry Andric (ins VR256:$src), 40660b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40670b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 406806c3fb27SDimitry Andric Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, WIG; 40690b57cec5SDimitry Andric} 40700b57cec5SDimitry Andric 40710b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 40720b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 40730b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 40740b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>; 40750b57cec5SDimitry Andric 40760b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 40770b57cec5SDimitry Andric 40780b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40790b57cec5SDimitry Andric// SSE2 - Conditional Store 40800b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 40810b57cec5SDimitry Andric 40820b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 408381ad6265SDimitry Andric// As VEX does not have separate instruction contexts for address size 408481ad6265SDimitry Andric// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict. 408581ad6265SDimitry Andric// Prefer VMASKMODDQU64. 40860b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 40870b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 40880b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 40890b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 40900b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 409106c3fb27SDimitry Andric VEX, WIG; 409206c3fb27SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in 409306c3fb27SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 409406c3fb27SDimitry Andric (ins VR128:$src, VR128:$mask), 40950b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 409606c3fb27SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 409706c3fb27SDimitry Andric VEX, WIG; 409806c3fb27SDimitry Andric 40990b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 41000b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 41010b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 410281ad6265SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; 410306c3fb27SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2] in 410406c3fb27SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 410506c3fb27SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 410606c3fb27SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 41070b57cec5SDimitry Andric 41080b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 41090b57cec5SDimitry Andric 41100b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41110b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword 41120b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41130b57cec5SDimitry Andric 41140b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41150b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int 41160b57cec5SDimitry Andric// 41170b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 41180b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 41190b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41200b57cec5SDimitry Andric [(set VR128:$dst, 41210b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 41220b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41230b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 41240b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41250b57cec5SDimitry Andric [(set VR128:$dst, 41260b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 41270b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 41280b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 41290b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41300b57cec5SDimitry Andric [(set VR128:$dst, 41310b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 41320b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41330b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 41340b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 41350b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41360b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 41370b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 41380b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 41390b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41400b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 41410b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41420b57cec5SDimitry Andric 41430b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 41440b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41450b57cec5SDimitry Andric [(set VR128:$dst, 41460b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 41470b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41480b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 41490b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41500b57cec5SDimitry Andric [(set VR128:$dst, 41510b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 41520b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 41530b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 41540b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41550b57cec5SDimitry Andric [(set VR128:$dst, 41560b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 41570b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41580b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 41590b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 41600b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 41610b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 41620b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 41630b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 41640b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 41650b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 41660b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41670b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 41680b57cec5SDimitry Andric 41690b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41700b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar 41710b57cec5SDimitry Andric// 41720b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 41730b57cec5SDimitry Andric def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 41740b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41750b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 41760b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 41770b57cec5SDimitry Andric 41780b57cec5SDimitry Andric def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 41790b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41800b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 41810b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 41820b57cec5SDimitry Andric 41830b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 41860b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int 41870b57cec5SDimitry Andric// 41880b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 41890b57cec5SDimitry Andricdef VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 41900b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41910b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 41920b57cec5SDimitry Andric (iPTR 0)))]>, VEX, 41930b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 41940b57cec5SDimitry Andricdef VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 41950b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src), 41960b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 41970b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 41980b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 41990b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 42000b57cec5SDimitry Andricdef MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 42010b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42020b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 42030b57cec5SDimitry Andric (iPTR 0)))]>, 42040b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 42050b57cec5SDimitry Andricdef MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 42060b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42070b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 42080b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 42090b57cec5SDimitry Andric Sched<[WriteVecStore]>; 42100b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 42110b57cec5SDimitry Andric 42120b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42130b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int 42140b57cec5SDimitry Andric// 42150b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 42160b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in { 42170b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 42180b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42190b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 42200b57cec5SDimitry Andric (iPTR 0)))]>, 42210b57cec5SDimitry Andric VEX; 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 42240b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42250b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 42260b57cec5SDimitry Andric (iPTR 0)))]>; 42270b57cec5SDimitry Andric} //SchedRW 42280b57cec5SDimitry Andric 42290b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 42300b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 42310b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src), 42320b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 42330b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 42340b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 42350b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 42360b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 42370b57cec5SDimitry Andric Sched<[WriteVecStore]>; 42380b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 42390b57cec5SDimitry Andric 42400b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42410b57cec5SDimitry Andric// Bitcast FR64 <-> GR64 42420b57cec5SDimitry Andric// 42430b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 42440b57cec5SDimitry Andric def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 42450b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42460b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 42470b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 42480b57cec5SDimitry Andric 42490b57cec5SDimitry Andric def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 42500b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 42510b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 42520b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 42530b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 42540b57cec5SDimitry Andric 42550b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 42560b57cec5SDimitry Andric// Move Scalar Single to Double Int 42570b57cec5SDimitry Andric// 42580b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 42590b57cec5SDimitry Andric def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 42600b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42610b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 42620b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 42630b57cec5SDimitry Andric def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 42640b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 42650b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 42660b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 42670b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 42680b57cec5SDimitry Andric 42690b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4270bdd1243dSDimitry Andric def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4271bdd1243dSDimitry Andric (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4272bdd1243dSDimitry Andric GR8:$src, sub_8bit)))>; 42730b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 42740b57cec5SDimitry Andric (VMOVDI2PDIrr GR32:$src)>; 42750b57cec5SDimitry Andric 42760b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 42770b57cec5SDimitry Andric (VMOV64toPQIrr GR64:$src)>; 42780b57cec5SDimitry Andric 42790b57cec5SDimitry Andric // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 42800b57cec5SDimitry Andric // These instructions also write zeros in the high part of a 256-bit register. 42810b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 42820b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 42830b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzload32 addr:$src)), 42840b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 42850b57cec5SDimitry Andric} 42860b57cec5SDimitry Andric 42870b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 42880b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 42890b57cec5SDimitry Andric (MOVDI2PDIrr GR32:$src)>; 42900b57cec5SDimitry Andric 42910b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 42920b57cec5SDimitry Andric (MOV64toPQIrr GR64:$src)>; 42930b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 42940b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 42950b57cec5SDimitry Andric} 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 42980b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 42990b57cec5SDimitry Andric// these aliases. 43000b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 43010b57cec5SDimitry Andric (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 43020b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 43030b57cec5SDimitry Andric (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 43040b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 43050b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 43060b57cec5SDimitry Andric (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 43070b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 43080b57cec5SDimitry Andric (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 43090b57cec5SDimitry Andric 43100b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43110b57cec5SDimitry Andric// SSE2 - Move Quadword 43120b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43130b57cec5SDimitry Andric 43140b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43150b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int 43160b57cec5SDimitry Andric// 43170b57cec5SDimitry Andric 43180b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 43190b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 43200b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 43210b57cec5SDimitry Andric [(set VR128:$dst, 4322cb14a3feSDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, TB, XS, 432306c3fb27SDimitry Andric VEX, Requires<[UseAVX]>, WIG; 43240b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 43250b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43260b57cec5SDimitry Andric [(set VR128:$dst, 43270b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4328cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 43290b57cec5SDimitry Andric} // ExeDomain, SchedRW 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43320b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int 43330b57cec5SDimitry Andric// 43340b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 43350b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 43360b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43370b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 43380b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 433906c3fb27SDimitry Andric VEX, WIG; 43400b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 43410b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43420b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 43430b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 43440b57cec5SDimitry Andric} // ExeDomain, SchedRW 43450b57cec5SDimitry Andric 43460b57cec5SDimitry Andric// For disassembler only 43470b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 43480b57cec5SDimitry Andric SchedRW = [SchedWriteVecLogic.XMM] in { 43490b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 435006c3fb27SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, VEX, WIG; 43510b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 43520b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>; 43530b57cec5SDimitry Andric} 43540b57cec5SDimitry Andric 43550b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 43560b57cec5SDimitry Andric (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 43570b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 43580b57cec5SDimitry Andric (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 43590b57cec5SDimitry Andric 43600b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 43610b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), 43620b57cec5SDimitry Andric (VMOVQI2PQIrm addr:$src)>; 43630b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzload64 addr:$src)), 43640b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 43650b57cec5SDimitry Andric 43660b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 43670b57cec5SDimitry Andric (VMOVPQI2QImr addr:$dst, VR128:$src)>; 43680b57cec5SDimitry Andric} 43690b57cec5SDimitry Andric 43700b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 43710b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 43740b57cec5SDimitry Andric (MOVPQI2QImr addr:$dst, VR128:$src)>; 43750b57cec5SDimitry Andric} 43760b57cec5SDimitry Andric 43770b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 43780b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 43790b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits. 43800b57cec5SDimitry Andric// 43810b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 43820b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 43830b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 43840b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4385cb14a3feSDimitry Andric TB, XS, VEX, Requires<[UseAVX]>, WIG; 43860b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 43870b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 43880b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4389cb14a3feSDimitry Andric TB, XS, Requires<[UseSSE2]>; 43900b57cec5SDimitry Andric} // ExeDomain, SchedRW 43910b57cec5SDimitry Andric 43920b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 43930b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 43940b57cec5SDimitry Andric (VMOVZPQILo2PQIrr VR128:$src)>; 43950b57cec5SDimitry Andric} 43960b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 43970b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 43980b57cec5SDimitry Andric (MOVZPQILo2PQIrr VR128:$src)>; 43990b57cec5SDimitry Andric} 44000b57cec5SDimitry Andric 44010b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 44020b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 44030b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 44040b57cec5SDimitry Andric (v2f64 (VMOVZPQILo2PQIrr 44050b57cec5SDimitry Andric (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 44060b57cec5SDimitry Andric sub_xmm)>; 44070b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 44080b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 44090b57cec5SDimitry Andric (v2i64 (VMOVZPQILo2PQIrr 44100b57cec5SDimitry Andric (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 44110b57cec5SDimitry Andric sub_xmm)>; 44120b57cec5SDimitry Andric} 44130b57cec5SDimitry Andric 44140b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44150b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 44160b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44170b57cec5SDimitry Andric 44180b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 44190b57cec5SDimitry Andric ValueType vt, RegisterClass RC, PatFrag mem_frag, 44200b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched> { 44210b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 44220b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44230b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src)))]>, 44240b57cec5SDimitry Andric Sched<[sched]>; 44250b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 44260b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44270b57cec5SDimitry Andric [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 44280b57cec5SDimitry Andric Sched<[sched.Folded]>; 44290b57cec5SDimitry Andric} 44300b57cec5SDimitry Andric 44310b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44320b57cec5SDimitry Andric defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 44330b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 443406c3fb27SDimitry Andric SchedWriteFShuffle.XMM>, VEX, WIG; 44350b57cec5SDimitry Andric defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 44360b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 443706c3fb27SDimitry Andric SchedWriteFShuffle.XMM>, VEX, WIG; 44380b57cec5SDimitry Andric defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 44390b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 444006c3fb27SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG; 44410b57cec5SDimitry Andric defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 44420b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 444306c3fb27SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG; 44440b57cec5SDimitry Andric} 44450b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 44460b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 44470b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 44480b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 44490b57cec5SDimitry Andric 44500b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 44510b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 44520b57cec5SDimitry Andric (VMOVSHDUPrr VR128:$src)>; 44530b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 44540b57cec5SDimitry Andric (VMOVSHDUPrm addr:$src)>; 44550b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 44560b57cec5SDimitry Andric (VMOVSLDUPrr VR128:$src)>; 44570b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 44580b57cec5SDimitry Andric (VMOVSLDUPrm addr:$src)>; 44590b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup VR256:$src)), 44600b57cec5SDimitry Andric (VMOVSHDUPYrr VR256:$src)>; 44610b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 44620b57cec5SDimitry Andric (VMOVSHDUPYrm addr:$src)>; 44630b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup VR256:$src)), 44640b57cec5SDimitry Andric (VMOVSLDUPYrr VR256:$src)>; 44650b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 44660b57cec5SDimitry Andric (VMOVSLDUPYrm addr:$src)>; 44670b57cec5SDimitry Andric} 44680b57cec5SDimitry Andric 44690b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 44700b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 44710b57cec5SDimitry Andric (MOVSHDUPrr VR128:$src)>; 44720b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 44730b57cec5SDimitry Andric (MOVSHDUPrm addr:$src)>; 44740b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 44750b57cec5SDimitry Andric (MOVSLDUPrr VR128:$src)>; 44760b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 44770b57cec5SDimitry Andric (MOVSLDUPrm addr:$src)>; 44780b57cec5SDimitry Andric} 44790b57cec5SDimitry Andric 44800b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44810b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP 44820b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 44830b57cec5SDimitry Andric 44840b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 44850b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 44860b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44870b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 44880b57cec5SDimitry Andric Sched<[sched.XMM]>; 44890b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 44900b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 44910b57cec5SDimitry Andric [(set VR128:$dst, 44920b57cec5SDimitry Andric (v2f64 (X86Movddup 44930b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src)))))]>, 44940b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 44950b57cec5SDimitry Andric} 44960b57cec5SDimitry Andric 44970b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version 44980b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 44990b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 45000b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45010b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 45020b57cec5SDimitry Andric Sched<[sched.YMM]>; 45030b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 45040b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 45050b57cec5SDimitry Andric [(set VR256:$dst, 45060b57cec5SDimitry Andric (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 45070b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 45080b57cec5SDimitry Andric} 45090b57cec5SDimitry Andric 45100b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 45110b57cec5SDimitry Andric defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 451206c3fb27SDimitry Andric VEX, WIG; 45130b57cec5SDimitry Andric defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 451406c3fb27SDimitry Andric VEX, VEX_L, WIG; 45150b57cec5SDimitry Andric} 45160b57cec5SDimitry Andric 45170b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 45180b57cec5SDimitry Andric 45190b57cec5SDimitry Andric 45200b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 45210b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 45220b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 45230b57cec5SDimitry Andric} 45240b57cec5SDimitry Andric 45250b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 45260b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 45270b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 45280b57cec5SDimitry Andric} 45290b57cec5SDimitry Andric 45300b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45310b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer 45320b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45330b57cec5SDimitry Andric 45340b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 45350b57cec5SDimitry Andric def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 45360b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 45370b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 453806c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG; 45390b57cec5SDimitry Andric def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 45400b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 45410b57cec5SDimitry Andric [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 454206c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, WIG; 45430b57cec5SDimitry Andric} // Predicates 45440b57cec5SDimitry Andric 45450b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 45460b57cec5SDimitry Andric "lddqu\t{$src, $dst|$dst, $src}", 45470b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 45480b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>; 45490b57cec5SDimitry Andric 45500b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45510b57cec5SDimitry Andric// SSE3 - Arithmetic 45520b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 45530b57cec5SDimitry Andric 45540b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 45550b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 45560b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 4557480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 45580b57cec5SDimitry Andric def rr : I<0xD0, MRMSrcReg, 45590b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 45600b57cec5SDimitry Andric !if(Is2Addr, 45610b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45620b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45630b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 45640b57cec5SDimitry Andric Sched<[sched]>; 45650b57cec5SDimitry Andric def rm : I<0xD0, MRMSrcMem, 45660b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 45670b57cec5SDimitry Andric !if(Is2Addr, 45680b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 45690b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 45700b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 45710b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 45720b57cec5SDimitry Andric} 4573480093f4SDimitry Andric} 45740b57cec5SDimitry Andric 45750b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 45760b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 45770b57cec5SDimitry Andric defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 45780b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 4579cb14a3feSDimitry Andric TB, XD, VEX, VVVV, WIG; 45800b57cec5SDimitry Andric defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 45810b57cec5SDimitry Andric SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 4582cb14a3feSDimitry Andric TB, XD, VEX, VVVV, VEX_L, WIG; 45830b57cec5SDimitry Andric } 45840b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 45850b57cec5SDimitry Andric defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 45860b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 4587cb14a3feSDimitry Andric TB, PD, VEX, VVVV, WIG; 45880b57cec5SDimitry Andric defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 45890b57cec5SDimitry Andric SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 4590cb14a3feSDimitry Andric TB, PD, VEX, VVVV, VEX_L, WIG; 45910b57cec5SDimitry Andric } 45920b57cec5SDimitry Andric} 45930b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 45940b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 45950b57cec5SDimitry Andric defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 4596cb14a3feSDimitry Andric SchedWriteFAddSizes.PS.XMM, memopv4f32>, TB, XD; 45970b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 45980b57cec5SDimitry Andric defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 4599cb14a3feSDimitry Andric SchedWriteFAddSizes.PD.XMM, memopv2f64>, TB, PD; 46000b57cec5SDimitry Andric} 46010b57cec5SDimitry Andric 46020b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46030b57cec5SDimitry Andric// SSE3 Instructions 46040b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46050b57cec5SDimitry Andric 46060b57cec5SDimitry Andric// Horizontal ops 46070b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 46080b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 46090b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 46100b57cec5SDimitry Andric bit Is2Addr = 1> { 4611480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 46120b57cec5SDimitry Andric def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 46130b57cec5SDimitry Andric !if(Is2Addr, 46140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46150b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46160b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 46170b57cec5SDimitry Andric Sched<[sched]>; 46180b57cec5SDimitry Andric 46190b57cec5SDimitry Andric def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 46200b57cec5SDimitry Andric !if(Is2Addr, 46210b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46220b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46230b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 46240b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46250b57cec5SDimitry Andric} 4626480093f4SDimitry Andric} 46270b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 46280b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 46290b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 46300b57cec5SDimitry Andric bit Is2Addr = 1> { 4631480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 46320b57cec5SDimitry Andric def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 46330b57cec5SDimitry Andric !if(Is2Addr, 46340b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46350b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46360b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 46370b57cec5SDimitry Andric Sched<[sched]>; 46380b57cec5SDimitry Andric 46390b57cec5SDimitry Andric def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 46400b57cec5SDimitry Andric !if(Is2Addr, 46410b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 46420b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 46430b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 46440b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 46450b57cec5SDimitry Andric} 4646480093f4SDimitry Andric} 46470b57cec5SDimitry Andric 46480b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 46490b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 46500b57cec5SDimitry Andric defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4651cb14a3feSDimitry Andric X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG; 46520b57cec5SDimitry Andric defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4653cb14a3feSDimitry Andric X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX, VVVV, WIG; 46540b57cec5SDimitry Andric defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4655cb14a3feSDimitry Andric X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG; 46560b57cec5SDimitry Andric defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4657cb14a3feSDimitry Andric X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX, VVVV, VEX_L, WIG; 46580b57cec5SDimitry Andric } 46590b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 46600b57cec5SDimitry Andric defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 4661cb14a3feSDimitry Andric X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG; 46620b57cec5SDimitry Andric defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 4663cb14a3feSDimitry Andric X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX, VVVV, WIG; 46640b57cec5SDimitry Andric defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 4665cb14a3feSDimitry Andric X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG; 46660b57cec5SDimitry Andric defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 4667cb14a3feSDimitry Andric X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX, VVVV, VEX_L, WIG; 46680b57cec5SDimitry Andric } 46690b57cec5SDimitry Andric} 46700b57cec5SDimitry Andric 46710b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 46720b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 46730b57cec5SDimitry Andric defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 46740b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 46750b57cec5SDimitry Andric defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 46760b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 46770b57cec5SDimitry Andric } 46780b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 46790b57cec5SDimitry Andric defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 46800b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 46810b57cec5SDimitry Andric defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 46820b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 46830b57cec5SDimitry Andric } 46840b57cec5SDimitry Andric} 46850b57cec5SDimitry Andric 46860b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46870b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions 46880b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 46890b57cec5SDimitry Andric 46900b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 46910b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 46920b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 46930b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 46940b57cec5SDimitry Andric (ins VR128:$src), 46950b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 46960b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 46970b57cec5SDimitry Andric Sched<[sched.XMM]>; 46980b57cec5SDimitry Andric 46990b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 47000b57cec5SDimitry Andric (ins i128mem:$src), 47010b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 47020b57cec5SDimitry Andric [(set VR128:$dst, 47030b57cec5SDimitry Andric (vt (OpNode (ld_frag addr:$src))))]>, 47040b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 47050b57cec5SDimitry Andric} 47060b57cec5SDimitry Andric 47070b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 47080b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 47090b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 47100b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 47110b57cec5SDimitry Andric (ins VR256:$src), 47120b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 47130b57cec5SDimitry Andric [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 47140b57cec5SDimitry Andric Sched<[sched.YMM]>; 47150b57cec5SDimitry Andric 47160b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 47170b57cec5SDimitry Andric (ins i256mem:$src), 47180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 47190b57cec5SDimitry Andric [(set VR256:$dst, 47200b57cec5SDimitry Andric (vt (OpNode (load addr:$src))))]>, 47210b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 47220b57cec5SDimitry Andric} 47230b57cec5SDimitry Andric 47240b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 47250b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 472606c3fb27SDimitry Andric load>, VEX, WIG; 47270b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 472806c3fb27SDimitry Andric load>, VEX, WIG; 47290b57cec5SDimitry Andric} 47300b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 47310b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 473206c3fb27SDimitry Andric load>, VEX, WIG; 47330b57cec5SDimitry Andric} 47340b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 47350b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 473606c3fb27SDimitry Andric VEX, VEX_L, WIG; 47370b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 473806c3fb27SDimitry Andric VEX, VEX_L, WIG; 47390b57cec5SDimitry Andric} 47400b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 47410b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 474206c3fb27SDimitry Andric VEX, VEX_L, WIG; 47430b57cec5SDimitry Andric} 47440b57cec5SDimitry Andric 47450b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 47460b57cec5SDimitry Andric memop>; 47470b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 47480b57cec5SDimitry Andric memop>; 47490b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 47500b57cec5SDimitry Andric memop>; 47510b57cec5SDimitry Andric 47520b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 47530b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions 47540b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 47550b57cec5SDimitry Andric 47560b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op 47570b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 47580b57cec5SDimitry Andric ValueType DstVT, ValueType OpVT, RegisterClass RC, 47590b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 47600b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 47610b57cec5SDimitry Andric let isCommutable = 1 in 47620b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 47630b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 47640b57cec5SDimitry Andric !if(Is2Addr, 47650b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47670b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 47680b57cec5SDimitry Andric Sched<[sched]>; 47690b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 47700b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 47710b57cec5SDimitry Andric !if(Is2Addr, 47720b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47730b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47740b57cec5SDimitry Andric [(set RC:$dst, 47750b57cec5SDimitry Andric (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 47760b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 47770b57cec5SDimitry Andric} 47780b57cec5SDimitry Andric 47790b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 47800b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 47810b57cec5SDimitry Andric Intrinsic IntId128, X86FoldableSchedWrite sched, 47820b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 47830b57cec5SDimitry Andric let isCommutable = 1 in 47840b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 47850b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 47860b57cec5SDimitry Andric !if(Is2Addr, 47870b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47890b57cec5SDimitry Andric [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 47900b57cec5SDimitry Andric Sched<[sched]>; 47910b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 47920b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 47930b57cec5SDimitry Andric !if(Is2Addr, 47940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 47950b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 47960b57cec5SDimitry Andric [(set VR128:$dst, 47970b57cec5SDimitry Andric (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 47980b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 47990b57cec5SDimitry Andric} 48000b57cec5SDimitry Andric 48010b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 48020b57cec5SDimitry Andric Intrinsic IntId256, 48030b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 48040b57cec5SDimitry Andric let isCommutable = 1 in 48050b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 48060b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 48070b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 48080b57cec5SDimitry Andric [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 48090b57cec5SDimitry Andric Sched<[sched]>; 48100b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 48110b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 48120b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 48130b57cec5SDimitry Andric [(set VR256:$dst, 48140b57cec5SDimitry Andric (IntId256 VR256:$src1, (load addr:$src2)))]>, 48150b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 48160b57cec5SDimitry Andric} 48170b57cec5SDimitry Andric 48180b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 48190b57cec5SDimitry Andriclet isCommutable = 0 in { 48200b57cec5SDimitry Andric defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 48210b57cec5SDimitry Andric VR128, load, i128mem, 4822cb14a3feSDimitry Andric SchedWriteVarShuffle.XMM, 0>, VEX, VVVV, WIG; 48230b57cec5SDimitry Andric defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 48240b57cec5SDimitry Andric v16i8, VR128, load, i128mem, 4825cb14a3feSDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG; 48260b57cec5SDimitry Andric} 48270b57cec5SDimitry Andricdefm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 48280b57cec5SDimitry Andric VR128, load, i128mem, 4829cb14a3feSDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX, VVVV, WIG; 48300b57cec5SDimitry Andric} 48310b57cec5SDimitry Andric 48320b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in { 48330b57cec5SDimitry Andriclet isCommutable = 0 in { 48340b57cec5SDimitry Andric defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 48350b57cec5SDimitry Andric load, i128mem, 4836cb14a3feSDimitry Andric SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; 48370b57cec5SDimitry Andric defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 48380b57cec5SDimitry Andric load, i128mem, 4839cb14a3feSDimitry Andric SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; 48400b57cec5SDimitry Andric defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 48410b57cec5SDimitry Andric load, i128mem, 4842cb14a3feSDimitry Andric SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; 48430b57cec5SDimitry Andric defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 48440b57cec5SDimitry Andric load, i128mem, 4845cb14a3feSDimitry Andric SchedWritePHAdd.XMM, 0>, VEX, VVVV, WIG; 48460b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 48470b57cec5SDimitry Andric int_x86_ssse3_psign_b_128, 4848cb14a3feSDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; 48490b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 48500b57cec5SDimitry Andric int_x86_ssse3_psign_w_128, 4851cb14a3feSDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; 48520b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 48530b57cec5SDimitry Andric int_x86_ssse3_psign_d_128, 4854cb14a3feSDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX, VVVV, WIG; 48550b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 48560b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 4857cb14a3feSDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG; 48580b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 48590b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 4860cb14a3feSDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX, VVVV, WIG; 48610b57cec5SDimitry Andric} 48620b57cec5SDimitry Andric} 48630b57cec5SDimitry Andric 48640b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 48650b57cec5SDimitry Andriclet isCommutable = 0 in { 48660b57cec5SDimitry Andric defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 48670b57cec5SDimitry Andric VR256, load, i256mem, 4868cb14a3feSDimitry Andric SchedWriteVarShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48690b57cec5SDimitry Andric defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 48700b57cec5SDimitry Andric v32i8, VR256, load, i256mem, 4871cb14a3feSDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48720b57cec5SDimitry Andric} 48730b57cec5SDimitry Andricdefm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 48740b57cec5SDimitry Andric VR256, load, i256mem, 4875cb14a3feSDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48760b57cec5SDimitry Andric} 48770b57cec5SDimitry Andric 48780b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in { 48790b57cec5SDimitry Andriclet isCommutable = 0 in { 48800b57cec5SDimitry Andric defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 48810b57cec5SDimitry Andric VR256, load, i256mem, 4882cb14a3feSDimitry Andric SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48830b57cec5SDimitry Andric defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 48840b57cec5SDimitry Andric load, i256mem, 4885cb14a3feSDimitry Andric SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48860b57cec5SDimitry Andric defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 48870b57cec5SDimitry Andric VR256, load, i256mem, 4888cb14a3feSDimitry Andric SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48890b57cec5SDimitry Andric defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 48900b57cec5SDimitry Andric load, i256mem, 4891cb14a3feSDimitry Andric SchedWritePHAdd.YMM, 0>, VEX, VVVV, VEX_L, WIG; 48920b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 4893cb14a3feSDimitry Andric SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; 48940b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 4895cb14a3feSDimitry Andric SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; 48960b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 4897cb14a3feSDimitry Andric SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L, WIG; 48980b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 48990b57cec5SDimitry Andric int_x86_avx2_phadd_sw, 4900cb14a3feSDimitry Andric SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG; 49010b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 49020b57cec5SDimitry Andric int_x86_avx2_phsub_sw, 4903cb14a3feSDimitry Andric SchedWritePHAdd.YMM>, VEX, VVVV, VEX_L, WIG; 49040b57cec5SDimitry Andric} 49050b57cec5SDimitry Andric} 49060b57cec5SDimitry Andric 49070b57cec5SDimitry Andric// None of these have i8 immediate fields. 49080b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in { 49090b57cec5SDimitry Andriclet isCommutable = 0 in { 49100b57cec5SDimitry Andric defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 49110b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 49120b57cec5SDimitry Andric defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 49130b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 49140b57cec5SDimitry Andric defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 49150b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 49160b57cec5SDimitry Andric defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 49170b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 49180b57cec5SDimitry Andric defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 49190b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 49200b57cec5SDimitry Andric defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 49210b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 49220b57cec5SDimitry Andric defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 49230b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 49240b57cec5SDimitry Andric defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 49250b57cec5SDimitry Andric memop, i128mem, SchedWriteVarShuffle.XMM>; 49260b57cec5SDimitry Andric defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 49270b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 49280b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 49290b57cec5SDimitry Andric defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 49300b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 49310b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 49320b57cec5SDimitry Andric defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 49330b57cec5SDimitry Andric v16i8, VR128, memop, i128mem, 49340b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 49350b57cec5SDimitry Andric} 49360b57cec5SDimitry Andricdefm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 49370b57cec5SDimitry Andric VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 49380b57cec5SDimitry Andric} 49390b57cec5SDimitry Andric 49400b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49410b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns 49420b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49430b57cec5SDimitry Andric 49440b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 49450b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 49460b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 49470b57cec5SDimitry Andric let hasSideEffects = 0 in { 49480b57cec5SDimitry Andric def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 49490b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 49500b57cec5SDimitry Andric !if(Is2Addr, 49510b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 49520b57cec5SDimitry Andric !strconcat(asm, 49530b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 49548bcb0991SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, 49550b57cec5SDimitry Andric Sched<[sched]>; 49560b57cec5SDimitry Andric let mayLoad = 1 in 49570b57cec5SDimitry Andric def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 49580b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 49590b57cec5SDimitry Andric !if(Is2Addr, 49600b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 49610b57cec5SDimitry Andric !strconcat(asm, 49620b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 49630b57cec5SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, 49640b57cec5SDimitry Andric (memop_frag addr:$src2), 49658bcb0991SDimitry Andric (i8 timm:$src3))))]>, 49660b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 49670b57cec5SDimitry Andric } 49680b57cec5SDimitry Andric} 49690b57cec5SDimitry Andric 49700b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 49710b57cec5SDimitry Andric defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 4972cb14a3feSDimitry Andric SchedWriteShuffle.XMM, 0>, VEX, VVVV, WIG; 49730b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 49740b57cec5SDimitry Andric defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 4975cb14a3feSDimitry Andric SchedWriteShuffle.YMM, 0>, VEX, VVVV, VEX_L, WIG; 49760b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 49770b57cec5SDimitry Andric defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 49780b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 49790b57cec5SDimitry Andric 49800b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49810b57cec5SDimitry Andric// SSSE3 - Thread synchronization 49820b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 49830b57cec5SDimitry Andric 49840b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 49850b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in 49860b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 49870b57cec5SDimitry Andric TB, Requires<[HasSSE3, Not64BitMode]>; 49880b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in 49890b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 49900b57cec5SDimitry Andric TB, Requires<[HasSSE3, In64BitMode]>; 49910b57cec5SDimitry Andric 49920b57cec5SDimitry Andriclet Uses = [ECX, EAX] in 49930b57cec5SDimitry Andricdef MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 49940b57cec5SDimitry Andric [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 49950b57cec5SDimitry Andric} // SchedRW 49960b57cec5SDimitry Andric 49970b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 49980b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 49990b57cec5SDimitry Andric 50000b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 50010b57cec5SDimitry Andric Requires<[Not64BitMode]>; 50020b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 50030b57cec5SDimitry Andric Requires<[In64BitMode]>; 50040b57cec5SDimitry Andric 50050b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 50060b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend 50070b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 50080b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 50090b57cec5SDimitry Andric 50100b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 50110b57cec5SDimitry Andric RegisterClass OutRC, RegisterClass InRC, 50120b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 50130b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 50140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 50150b57cec5SDimitry Andric Sched<[sched]>; 50160b57cec5SDimitry Andric 50170b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 50180b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 50190b57cec5SDimitry Andric Sched<[sched.Folded]>; 50200b57cec5SDimitry Andric} 50210b57cec5SDimitry Andric 50220b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 50230b57cec5SDimitry Andric X86MemOperand MemOp, X86MemOperand MemYOp, 50240b57cec5SDimitry Andric Predicate prd> { 50250b57cec5SDimitry Andric defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 50260b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 50270b57cec5SDimitry Andric let Predicates = [HasAVX, prd] in 50280b57cec5SDimitry Andric defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 5029bdd1243dSDimitry Andric VR128, VR128, SchedWriteVecExtend.XMM>, 503006c3fb27SDimitry Andric VEX, WIG; 50310b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in 50320b57cec5SDimitry Andric defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 5033bdd1243dSDimitry Andric VR256, VR128, SchedWriteVecExtend.YMM>, 503406c3fb27SDimitry Andric VEX, VEX_L, WIG; 50350b57cec5SDimitry Andric} 50360b57cec5SDimitry Andric 50370b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 50380b57cec5SDimitry Andric X86MemOperand MemYOp, Predicate prd> { 50390b57cec5SDimitry Andric defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 50400b57cec5SDimitry Andric MemOp, MemYOp, prd>; 50410b57cec5SDimitry Andric defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 50420b57cec5SDimitry Andric !strconcat("pmovzx", OpcodeStr), 50430b57cec5SDimitry Andric MemOp, MemYOp, prd>; 50440b57cec5SDimitry Andric} 50450b57cec5SDimitry Andric 50460b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 50470b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 50480b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 50490b57cec5SDimitry Andric 50500b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 50510b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 50520b57cec5SDimitry Andric 50530b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric// AVX2 Patterns 50560b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 50570b57cec5SDimitry Andric SDNode ExtOp, SDNode InVecOp> { 50580b57cec5SDimitry Andric // Register-Register patterns 50590b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 50600b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 50610b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 50620b57cec5SDimitry Andric } 50630b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 50640b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 50650b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 50660b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 50670b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 50680b57cec5SDimitry Andric 50690b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 50700b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 50710b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 50720b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 50730b57cec5SDimitry Andric 50740b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 50750b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 50760b57cec5SDimitry Andric } 50770b57cec5SDimitry Andric 50780b57cec5SDimitry Andric // Simple Register-Memory patterns 50790b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 50800b57cec5SDimitry Andric def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50810b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 50820b57cec5SDimitry Andric 50830b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 50840b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 50850b57cec5SDimitry Andric } 50860b57cec5SDimitry Andric 50870b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 50880b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50890b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 50900b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 50910b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 50920b57cec5SDimitry Andric 50930b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50940b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 50950b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 50960b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 50970b57cec5SDimitry Andric 50980b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 50990b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 51000b57cec5SDimitry Andric } 51010b57cec5SDimitry Andric 51020b57cec5SDimitry Andric // AVX2 Register-Memory patterns 51030b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 51040b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 51050b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 51060b57cec5SDimitry Andric 51070b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51080b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 51095ffd83dbSDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51105ffd83dbSDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 51115ffd83dbSDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 51120b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 51130b57cec5SDimitry Andric 51140b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 51150b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 51160b57cec5SDimitry Andric 51170b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 51180b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 51195ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))), 51200b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 51210b57cec5SDimitry Andric 51220b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51230b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51245ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51255ffd83dbSDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51265ffd83dbSDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 51270b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 51280b57cec5SDimitry Andric } 51290b57cec5SDimitry Andric} 51300b57cec5SDimitry Andric 51310b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 51320b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 51330b57cec5SDimitry Andric 51340b57cec5SDimitry Andric// SSE4.1/AVX patterns. 51350b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 51360b57cec5SDimitry Andric SDNode ExtOp> { 51370b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51380b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 51390b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 51400b57cec5SDimitry Andric } 51410b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51420b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 51430b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 51440b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 51450b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 51460b57cec5SDimitry Andric 51470b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 51480b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 51490b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 51500b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 51510b57cec5SDimitry Andric 51520b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 51530b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 51540b57cec5SDimitry Andric } 51550b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51560b57cec5SDimitry Andric def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51570b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51580b57cec5SDimitry Andric } 51590b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51600b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51610b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51620b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 51630b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51640b57cec5SDimitry Andric 51650b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 51660b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51670b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 51680b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 51690b57cec5SDimitry Andric 51700b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 51710b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 51720b57cec5SDimitry Andric } 51730b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 51740b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51750b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51760b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51770b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51780b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 51790b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51800b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 51810b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 51820b57cec5SDimitry Andric } 51830b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 51840b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 51850b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51860b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 51870b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51880b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 51890b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 51900b57cec5SDimitry Andric 51910b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 51920b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51930b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 51940b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 51950b57cec5SDimitry Andric 51960b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 51970b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 51980b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 51990b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 52000b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 52010b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 52020b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 52030b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 52040b57cec5SDimitry Andric 52050b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 52060b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 52070b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 52080b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 52090b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 52100b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 52110b57cec5SDimitry Andric 52120b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 52130b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 52140b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 52150b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 52160b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 52170b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 52180b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 52190b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 52200b57cec5SDimitry Andric } 52210b57cec5SDimitry Andric} 52220b57cec5SDimitry Andric 52230b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 52240b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 52250b57cec5SDimitry Andric 52260b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 52270b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 52280b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 52290b57cec5SDimitry Andric} 52300b57cec5SDimitry Andric 52310b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52320b57cec5SDimitry Andric// SSE4.1 - Extract Instructions 52330b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 52340b57cec5SDimitry Andric 52350b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 52360b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> { 52370b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 52380b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52390b57cec5SDimitry Andric !strconcat(OpcodeStr, 52400b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52410b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 5242e8d8bef9SDimitry Andric timm:$src2))]>, 52430b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52440b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 52450b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52460b57cec5SDimitry Andric (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 52470b57cec5SDimitry Andric !strconcat(OpcodeStr, 52480b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5249e8d8bef9SDimitry Andric [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), 52500b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52510b57cec5SDimitry Andric} 52520b57cec5SDimitry Andric 52530b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 525406c3fb27SDimitry Andric defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG; 52550b57cec5SDimitry Andric 52560b57cec5SDimitry Andricdefm PEXTRB : SS41I_extract8<0x14, "pextrb">; 52570b57cec5SDimitry Andric 52580b57cec5SDimitry Andric 52590b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 52600b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> { 52610b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 52620b57cec5SDimitry Andric def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 52630b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52640b57cec5SDimitry Andric !strconcat(OpcodeStr, 52650b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 526606c3fb27SDimitry Andric Sched<[WriteVecExtract]>; 52670b57cec5SDimitry Andric 52680b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 52690b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52700b57cec5SDimitry Andric (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 52710b57cec5SDimitry Andric !strconcat(OpcodeStr, 52720b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5273e8d8bef9SDimitry Andric [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), 52740b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 52750b57cec5SDimitry Andric} 52760b57cec5SDimitry Andric 52770b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 527806c3fb27SDimitry Andric defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, WIG; 52790b57cec5SDimitry Andric 52800b57cec5SDimitry Andricdefm PEXTRW : SS41I_extract16<0x15, "pextrw">; 52810b57cec5SDimitry Andric 528281ad6265SDimitry Andriclet Predicates = [UseSSE41] in 528381ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (PEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 528481ad6265SDimitry Andric 528581ad6265SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 528681ad6265SDimitry Andric def : Pat<(store f16:$src, addr:$dst), (VPEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 528781ad6265SDimitry Andric 52880b57cec5SDimitry Andric 52890b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 52900b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> { 52910b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 52920b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 52930b57cec5SDimitry Andric !strconcat(OpcodeStr, 52940b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 52950b57cec5SDimitry Andric [(set GR32:$dst, 52960b57cec5SDimitry Andric (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 52970b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 52980b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 52990b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 53000b57cec5SDimitry Andric !strconcat(OpcodeStr, 53010b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53020b57cec5SDimitry Andric [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 53030b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 53040b57cec5SDimitry Andric} 53050b57cec5SDimitry Andric 53060b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 53070b57cec5SDimitry Andric defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 53080b57cec5SDimitry Andric 53090b57cec5SDimitry Andricdefm PEXTRD : SS41I_extract32<0x16, "pextrd">; 53100b57cec5SDimitry Andric 53110b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 53120b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> { 53130b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 53140b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 53150b57cec5SDimitry Andric !strconcat(OpcodeStr, 53160b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53170b57cec5SDimitry Andric [(set GR64:$dst, 53180b57cec5SDimitry Andric (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 53190b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 53200b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 53210b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 53220b57cec5SDimitry Andric !strconcat(OpcodeStr, 53230b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53240b57cec5SDimitry Andric [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 53250b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 53260b57cec5SDimitry Andric} 53270b57cec5SDimitry Andric 53280b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 532906c3fb27SDimitry Andric defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, REX_W; 53300b57cec5SDimitry Andric 53310b57cec5SDimitry Andricdefm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 53320b57cec5SDimitry Andric 53330b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 53340b57cec5SDimitry Andric/// destination 53350b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 53360b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 53370b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 53380b57cec5SDimitry Andric !strconcat(OpcodeStr, 53390b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53400b57cec5SDimitry Andric [(set GR32orGR64:$dst, 53410b57cec5SDimitry Andric (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 53420b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 53430b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 53440b57cec5SDimitry Andric (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 53450b57cec5SDimitry Andric !strconcat(OpcodeStr, 53460b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 53470b57cec5SDimitry Andric [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 53480b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 53490b57cec5SDimitry Andric} 53500b57cec5SDimitry Andric 53510b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 53520b57cec5SDimitry Andric let Predicates = [UseAVX] in 535306c3fb27SDimitry Andric defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, WIG; 53540b57cec5SDimitry Andric defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 53550b57cec5SDimitry Andric} 53560b57cec5SDimitry Andric 53570b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53580b57cec5SDimitry Andric// SSE4.1 - Insert Instructions 53590b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 53600b57cec5SDimitry Andric 53610b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 53620b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53630b57cec5SDimitry Andric (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 53640b57cec5SDimitry Andric !if(Is2Addr, 53650b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53660b57cec5SDimitry Andric !strconcat(asm, 53670b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53680b57cec5SDimitry Andric [(set VR128:$dst, 5369e8d8bef9SDimitry Andric (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 53700b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 53710b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 53720b57cec5SDimitry Andric (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 53730b57cec5SDimitry Andric !if(Is2Addr, 53740b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53750b57cec5SDimitry Andric !strconcat(asm, 53760b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53770b57cec5SDimitry Andric [(set VR128:$dst, 5378e8d8bef9SDimitry Andric (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, 53790b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 53800b57cec5SDimitry Andric} 53810b57cec5SDimitry Andric 5382bdd1243dSDimitry Andriclet Predicates = [HasAVX, NoBWI] in { 5383cb14a3feSDimitry Andric defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX, VVVV, WIG; 5384bdd1243dSDimitry Andric def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 5385bdd1243dSDimitry Andric (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 5386bdd1243dSDimitry Andric GR8:$src2, sub_8bit), timm:$src3)>; 5387bdd1243dSDimitry Andric} 5388bdd1243dSDimitry Andric 53890b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 53900b57cec5SDimitry Andric defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 53910b57cec5SDimitry Andric 53920b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 53930b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 53940b57cec5SDimitry Andric (ins VR128:$src1, GR32:$src2, u8imm:$src3), 53950b57cec5SDimitry Andric !if(Is2Addr, 53960b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 53970b57cec5SDimitry Andric !strconcat(asm, 53980b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 53990b57cec5SDimitry Andric [(set VR128:$dst, 54000b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 54010b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 54020b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 54030b57cec5SDimitry Andric (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 54040b57cec5SDimitry Andric !if(Is2Addr, 54050b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54060b57cec5SDimitry Andric !strconcat(asm, 54070b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54080b57cec5SDimitry Andric [(set VR128:$dst, 54090b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 54100b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 54110b57cec5SDimitry Andric} 54120b57cec5SDimitry Andric 54130b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5414cb14a3feSDimitry Andric defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX, VVVV; 54150b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 54160b57cec5SDimitry Andric defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 54170b57cec5SDimitry Andric 54180b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 54190b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 54200b57cec5SDimitry Andric (ins VR128:$src1, GR64:$src2, u8imm:$src3), 54210b57cec5SDimitry Andric !if(Is2Addr, 54220b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54230b57cec5SDimitry Andric !strconcat(asm, 54240b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54250b57cec5SDimitry Andric [(set VR128:$dst, 54260b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 54270b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 54280b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 54290b57cec5SDimitry Andric (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 54300b57cec5SDimitry Andric !if(Is2Addr, 54310b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54320b57cec5SDimitry Andric !strconcat(asm, 54330b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54340b57cec5SDimitry Andric [(set VR128:$dst, 54350b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 54360b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 54370b57cec5SDimitry Andric} 54380b57cec5SDimitry Andric 54390b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5440cb14a3feSDimitry Andric defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX, VVVV, REX_W; 54410b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 54420b57cec5SDimitry Andric defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 54430b57cec5SDimitry Andric 54440b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which 54450b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination 54460b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements 54470b57cec5SDimitry Andric// in the target vector. 54480b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 54490b57cec5SDimitry Andric let isCommutable = 1 in 54500b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 54510b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 54520b57cec5SDimitry Andric !if(Is2Addr, 54530b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54540b57cec5SDimitry Andric !strconcat(asm, 54550b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54560b57cec5SDimitry Andric [(set VR128:$dst, 54578bcb0991SDimitry Andric (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, 54580b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 54590b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 54600b57cec5SDimitry Andric (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 54610b57cec5SDimitry Andric !if(Is2Addr, 54620b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 54630b57cec5SDimitry Andric !strconcat(asm, 54640b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 54650b57cec5SDimitry Andric [(set VR128:$dst, 54660b57cec5SDimitry Andric (X86insertps VR128:$src1, 54670b57cec5SDimitry Andric (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 54688bcb0991SDimitry Andric timm:$src3))]>, 54690b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 54700b57cec5SDimitry Andric} 54710b57cec5SDimitry Andric 54720b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 54730b57cec5SDimitry Andric let Predicates = [UseAVX] in 54740b57cec5SDimitry Andric defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 5475cb14a3feSDimitry Andric VEX, VVVV, WIG; 54760b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 54770b57cec5SDimitry Andric defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 54780b57cec5SDimitry Andric} 54790b57cec5SDimitry Andric 54800b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 54810b57cec5SDimitry Andric// SSE4.1 - Round Instructions 54820b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 54830b57cec5SDimitry Andric 54840b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 54850b57cec5SDimitry Andric X86MemOperand x86memop, RegisterClass RC, 5486fe6060f1SDimitry Andric ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode, 54870b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 54880b57cec5SDimitry Andric // Intrinsic operation, reg. 54890b57cec5SDimitry Andric // Vector intrinsic operation, reg 5490480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 5491*0fca6ea1SDimitry Andric def ri : SS4AIi8<opc, MRMSrcReg, 54920b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 54930b57cec5SDimitry Andric !strconcat(OpcodeStr, 54940b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 54958bcb0991SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, 54960b57cec5SDimitry Andric Sched<[sched]>; 54970b57cec5SDimitry Andric 54980b57cec5SDimitry Andric // Vector intrinsic operation, mem 5499*0fca6ea1SDimitry Andric def mi : SS4AIi8<opc, MRMSrcMem, 55000b57cec5SDimitry Andric (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 55010b57cec5SDimitry Andric !strconcat(OpcodeStr, 55020b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55030b57cec5SDimitry Andric [(set RC:$dst, 55048bcb0991SDimitry Andric (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, 55050b57cec5SDimitry Andric Sched<[sched.Folded]>; 55060b57cec5SDimitry Andric} 5507480093f4SDimitry Andric} 55080b57cec5SDimitry Andric 55090b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 55100b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 55118bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5512*0fca6ea1SDimitry Andric def SSri : SS4AIi8<opcss, MRMSrcReg, 55130b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 55140b57cec5SDimitry Andric !strconcat(OpcodeStr, 55150b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55160b57cec5SDimitry Andric []>, Sched<[sched]>; 55170b57cec5SDimitry Andric 55180b57cec5SDimitry Andric let mayLoad = 1 in 5519*0fca6ea1SDimitry Andric def SSmi : SS4AIi8<opcss, MRMSrcMem, 55200b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 55210b57cec5SDimitry Andric !strconcat(OpcodeStr, 55220b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55230b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55240b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 55250b57cec5SDimitry Andric 55268bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5527*0fca6ea1SDimitry Andric def SDri : SS4AIi8<opcsd, MRMSrcReg, 55280b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 55290b57cec5SDimitry Andric !strconcat(OpcodeStr, 55300b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55310b57cec5SDimitry Andric []>, Sched<[sched]>; 55320b57cec5SDimitry Andric 55330b57cec5SDimitry Andric let mayLoad = 1 in 5534*0fca6ea1SDimitry Andric def SDmi : SS4AIi8<opcsd, MRMSrcMem, 55350b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 55360b57cec5SDimitry Andric !strconcat(OpcodeStr, 55370b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 55380b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55390b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 55400b57cec5SDimitry Andric} 55410b57cec5SDimitry Andric 55420b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 55430b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5544480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 55458bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5546*0fca6ea1SDimitry Andric def SSri : SS4AIi8<opcss, MRMSrcReg, 55470b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 55480b57cec5SDimitry Andric !strconcat(OpcodeStr, 55490b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55500b57cec5SDimitry Andric []>, Sched<[sched]>; 55510b57cec5SDimitry Andric 55520b57cec5SDimitry Andric let mayLoad = 1 in 5553*0fca6ea1SDimitry Andric def SSmi : SS4AIi8<opcss, MRMSrcMem, 55540b57cec5SDimitry Andric (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 55550b57cec5SDimitry Andric !strconcat(OpcodeStr, 55560b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55570b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55580b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 55590b57cec5SDimitry Andric 55608bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5561*0fca6ea1SDimitry Andric def SDri : SS4AIi8<opcsd, MRMSrcReg, 55620b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 55630b57cec5SDimitry Andric !strconcat(OpcodeStr, 55640b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55650b57cec5SDimitry Andric []>, Sched<[sched]>; 55660b57cec5SDimitry Andric 55670b57cec5SDimitry Andric let mayLoad = 1 in 5568*0fca6ea1SDimitry Andric def SDmi : SS4AIi8<opcsd, MRMSrcMem, 55690b57cec5SDimitry Andric (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 55700b57cec5SDimitry Andric !strconcat(OpcodeStr, 55710b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 55720b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 55730b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 55740b57cec5SDimitry Andric} 5575480093f4SDimitry Andric} 55760b57cec5SDimitry Andric 5577*0fca6ea1SDimitry Andricmulticlass sse41_fp_unop_s_int<bits<8> opcss, bits<8> opcsd, 55780b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched, 55790b57cec5SDimitry Andric ValueType VT32, ValueType VT64, 55800b57cec5SDimitry Andric SDNode OpNode, bit Is2Addr = 1> { 5581480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 55828bcb0991SDimitry Andriclet ExeDomain = SSEPackedSingle in { 5583*0fca6ea1SDimitry Andric def SSri_Int : SS4AIi8<opcss, MRMSrcReg, 55840b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 55850b57cec5SDimitry Andric !if(Is2Addr, 55860b57cec5SDimitry Andric !strconcat(OpcodeStr, 55870b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55880b57cec5SDimitry Andric !strconcat(OpcodeStr, 55890b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 55908bcb0991SDimitry Andric [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 55910b57cec5SDimitry Andric Sched<[sched]>; 55920b57cec5SDimitry Andric 5593*0fca6ea1SDimitry Andric def SSmi_Int : SS4AIi8<opcss, MRMSrcMem, 55940b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 55950b57cec5SDimitry Andric !if(Is2Addr, 55960b57cec5SDimitry Andric !strconcat(OpcodeStr, 55970b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 55980b57cec5SDimitry Andric !strconcat(OpcodeStr, 55990b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 56000b57cec5SDimitry Andric [(set VR128:$dst, 56015ffd83dbSDimitry Andric (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>, 56020b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 56030b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 56040b57cec5SDimitry Andric 56058bcb0991SDimitry Andriclet ExeDomain = SSEPackedDouble in { 5606*0fca6ea1SDimitry Andric def SDri_Int : SS4AIi8<opcsd, MRMSrcReg, 56070b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 56080b57cec5SDimitry Andric !if(Is2Addr, 56090b57cec5SDimitry Andric !strconcat(OpcodeStr, 56100b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 56110b57cec5SDimitry Andric !strconcat(OpcodeStr, 56120b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 56138bcb0991SDimitry Andric [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 56140b57cec5SDimitry Andric Sched<[sched]>; 56150b57cec5SDimitry Andric 5616*0fca6ea1SDimitry Andric def SDmi_Int : SS4AIi8<opcsd, MRMSrcMem, 56170b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 56180b57cec5SDimitry Andric !if(Is2Addr, 56190b57cec5SDimitry Andric !strconcat(OpcodeStr, 56200b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 56210b57cec5SDimitry Andric !strconcat(OpcodeStr, 56220b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 56230b57cec5SDimitry Andric [(set VR128:$dst, 56245ffd83dbSDimitry Andric (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>, 56250b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 56260b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 56270b57cec5SDimitry Andric} 5628480093f4SDimitry Andric} 56290b57cec5SDimitry Andric 56300b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd 56310b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5632480093f4SDimitry Andric let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { 56330b57cec5SDimitry Andric // Intrinsic form 56340b57cec5SDimitry Andric defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 5635480093f4SDimitry Andric loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>, 563606c3fb27SDimitry Andric VEX, WIG; 56370b57cec5SDimitry Andric defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 5638480093f4SDimitry Andric loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>, 563906c3fb27SDimitry Andric VEX, VEX_L, WIG; 56400b57cec5SDimitry Andric } 56410b57cec5SDimitry Andric 5642480093f4SDimitry Andric let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { 56430b57cec5SDimitry Andric defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 5644480093f4SDimitry Andric loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>, 564506c3fb27SDimitry Andric VEX, WIG; 56460b57cec5SDimitry Andric defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 5647480093f4SDimitry Andric loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>, 564806c3fb27SDimitry Andric VEX, VEX_L, WIG; 56490b57cec5SDimitry Andric } 56500b57cec5SDimitry Andric} 56510b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5652*0fca6ea1SDimitry Andric defm VROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 56530b57cec5SDimitry Andric v4f32, v2f64, X86RndScales, 0>, 5654cb14a3feSDimitry Andric VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; 56550b57cec5SDimitry Andric defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 5656cb14a3feSDimitry Andric VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; 56570b57cec5SDimitry Andric} 56580b57cec5SDimitry Andric 56590b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5660480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5661*0fca6ea1SDimitry Andric (VROUNDSSri (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; 5662480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5663*0fca6ea1SDimitry Andric (VROUNDSDri (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; 56640b57cec5SDimitry Andric} 56650b57cec5SDimitry Andric 56660b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 5667480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5668*0fca6ea1SDimitry Andric (VROUNDSSmi (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5669480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5670*0fca6ea1SDimitry Andric (VROUNDSDmi (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 56710b57cec5SDimitry Andric} 56720b57cec5SDimitry Andric 56730b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 56740b57cec5SDimitry Andricdefm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 5675480093f4SDimitry Andric memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>; 56760b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 56770b57cec5SDimitry Andricdefm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 5678480093f4SDimitry Andric memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>; 56790b57cec5SDimitry Andric 56800b57cec5SDimitry Andricdefm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 56810b57cec5SDimitry Andric 56820b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 5683*0fca6ea1SDimitry Andricdefm ROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 56840b57cec5SDimitry Andric v4f32, v2f64, X86RndScales>; 56850b57cec5SDimitry Andric 56860b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 5687480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5688*0fca6ea1SDimitry Andric (ROUNDSSri FR32:$src1, timm:$src2)>; 5689480093f4SDimitry Andric def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5690*0fca6ea1SDimitry Andric (ROUNDSDri FR64:$src1, timm:$src2)>; 56910b57cec5SDimitry Andric} 56920b57cec5SDimitry Andric 56930b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in { 5694480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5695*0fca6ea1SDimitry Andric (ROUNDSSmi addr:$src1, timm:$src2)>; 5696480093f4SDimitry Andric def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5697*0fca6ea1SDimitry Andric (ROUNDSDmi addr:$src1, timm:$src2)>; 56980b57cec5SDimitry Andric} 56990b57cec5SDimitry Andric 57000b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 57010b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test 57020b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 57030b57cec5SDimitry Andric 5704*0fca6ea1SDimitry Andric// ptest is commutable if only the Z flag is used. If the C flag is used, 5705*0fca6ea1SDimitry Andric// commuting would change which operand is inverted. 5706*0fca6ea1SDimitry Andricdef X86ptest_commutable : PatFrag<(ops node:$src1, node:$src2), 5707*0fca6ea1SDimitry Andric (X86ptest node:$src1, node:$src2), [{ 5708*0fca6ea1SDimitry Andric return onlyUsesZeroFlag(SDValue(Node, 0)); 5709*0fca6ea1SDimitry Andric}]>; 5710*0fca6ea1SDimitry Andric 57110b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from 57120b57cec5SDimitry Andric// the intel intrinsic that corresponds to this. 57130b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 57140b57cec5SDimitry Andricdef VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 57150b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 57160b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 571706c3fb27SDimitry Andric Sched<[SchedWriteVecTest.XMM]>, VEX, WIG; 57180b57cec5SDimitry Andricdef VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 57190b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 57200b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 57210b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 572206c3fb27SDimitry Andric VEX, WIG; 57230b57cec5SDimitry Andric 57240b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 57250b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 57260b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 572706c3fb27SDimitry Andric Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, WIG; 57280b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 57290b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 57300b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 57310b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 573206c3fb27SDimitry Andric VEX, VEX_L, WIG; 57330b57cec5SDimitry Andric} 57340b57cec5SDimitry Andric 57350b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 57360b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 57370b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 57380b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 57390b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>; 57400b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 57410b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 57420b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 57430b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 57440b57cec5SDimitry Andric} 57450b57cec5SDimitry Andric 5746*0fca6ea1SDimitry Andriclet Predicates = [HasAVX] in { 5747*0fca6ea1SDimitry Andric def : Pat<(X86ptest_commutable (loadv2i64 addr:$src2), VR128:$src1), 5748*0fca6ea1SDimitry Andric (VPTESTrm VR128:$src1, addr:$src2)>; 5749*0fca6ea1SDimitry Andric def : Pat<(X86ptest_commutable (loadv4i64 addr:$src2), VR256:$src1), 5750*0fca6ea1SDimitry Andric (VPTESTYrm VR256:$src1, addr:$src2)>; 5751*0fca6ea1SDimitry Andric} 5752*0fca6ea1SDimitry Andriclet Predicates = [UseSSE41] in { 5753*0fca6ea1SDimitry Andric def : Pat<(X86ptest_commutable (memopv2i64 addr:$src2), VR128:$src1), 5754*0fca6ea1SDimitry Andric (PTESTrm VR128:$src1, addr:$src2)>; 5755*0fca6ea1SDimitry Andric} 5756*0fca6ea1SDimitry Andric 57570b57cec5SDimitry Andric// The bit test instructions below are AVX only 57580b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 57590b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 57600b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 57610b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 57620b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 57630b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 57640b57cec5SDimitry Andric Sched<[sched]>, VEX; 57650b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 57660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 57670b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 57680b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 57690b57cec5SDimitry Andric} 57700b57cec5SDimitry Andric 5771*0fca6ea1SDimitry Andric// testps/testpd are commutable if only the Z flag is used. If the C flag is 5772*0fca6ea1SDimitry Andric// used, commuting would change which operand is inverted. 5773*0fca6ea1SDimitry Andricdef X86testp_commutable : PatFrag<(ops node:$src1, node:$src2), 5774*0fca6ea1SDimitry Andric (X86testp node:$src1, node:$src2), [{ 5775*0fca6ea1SDimitry Andric return onlyUsesZeroFlag(SDValue(Node, 0)); 5776*0fca6ea1SDimitry Andric}]>; 5777*0fca6ea1SDimitry Andric 57780b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 57790b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 57800b57cec5SDimitry Andricdefm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 57810b57cec5SDimitry Andric SchedWriteFTest.XMM>; 57820b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 57830b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 57840b57cec5SDimitry Andric} 57850b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 57860b57cec5SDimitry Andricdefm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 57870b57cec5SDimitry Andric SchedWriteFTest.XMM>; 57880b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 57890b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 57900b57cec5SDimitry Andric} 57910b57cec5SDimitry Andric} 57920b57cec5SDimitry Andric 5793*0fca6ea1SDimitry Andriclet Predicates = [HasAVX] in { 5794*0fca6ea1SDimitry Andric def : Pat<(X86testp_commutable (loadv4f32 addr:$src2), VR128:$src), 5795*0fca6ea1SDimitry Andric (VTESTPSrm VR128:$src, addr:$src2)>; 5796*0fca6ea1SDimitry Andric def : Pat<(X86testp_commutable (loadv8f32 addr:$src2), VR256:$src), 5797*0fca6ea1SDimitry Andric (VTESTPSYrm VR256:$src, addr:$src2)>; 5798*0fca6ea1SDimitry Andric 5799*0fca6ea1SDimitry Andric def : Pat<(X86testp_commutable (loadv2f64 addr:$src2), VR128:$src), 5800*0fca6ea1SDimitry Andric (VTESTPDrm VR128:$src, addr:$src2)>; 5801*0fca6ea1SDimitry Andric def : Pat<(X86testp_commutable (loadv4f64 addr:$src2), VR256:$src), 5802*0fca6ea1SDimitry Andric (VTESTPDYrm VR256:$src, addr:$src2)>; 5803*0fca6ea1SDimitry Andric} 5804*0fca6ea1SDimitry Andric 58050b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 58060b57cec5SDimitry Andric// SSE4.1 - Misc Instructions 58070b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 58080b57cec5SDimitry Andric 58090b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 5810*0fca6ea1SDimitry Andric defm POPCNT16 : Lzcnt<0xB8, "popcnt", ctpop, Xi16, WritePOPCNT, WritePOPCNT.Folded>, OpSize16, XS; 5811*0fca6ea1SDimitry Andric defm POPCNT32 : Lzcnt<0xB8, "popcnt", ctpop, Xi32, WritePOPCNT, WritePOPCNT.Folded>, OpSize32, XS; 5812*0fca6ea1SDimitry Andric defm POPCNT64 : Lzcnt<0xB8, "popcnt", ctpop, Xi64, WritePOPCNT, WritePOPCNT.Folded>, XS; 58130b57cec5SDimitry Andric 5814*0fca6ea1SDimitry Andric defm POPCNT16 : Lzcnt<0x88, "popcnt", null_frag, Xi16, WritePOPCNT, WritePOPCNT.Folded, "_EVEX">, PL, PD; 5815*0fca6ea1SDimitry Andric defm POPCNT32 : Lzcnt<0x88, "popcnt", null_frag, Xi32, WritePOPCNT, WritePOPCNT.Folded, "_EVEX">, PL; 5816*0fca6ea1SDimitry Andric defm POPCNT64 : Lzcnt<0x88, "popcnt", null_frag, Xi64, WritePOPCNT, WritePOPCNT.Folded, "_EVEX">, PL; 58170b57cec5SDimitry Andric} 58180b57cec5SDimitry Andric 5819*0fca6ea1SDimitry Andricdefm POPCNT16 : Lzcnt<0x88, "popcnt", null_frag, Xi16, WritePOPCNT, WritePOPCNT.Folded, "_NF">, NF, PD; 5820*0fca6ea1SDimitry Andricdefm POPCNT32 : Lzcnt<0x88, "popcnt", null_frag, Xi32, WritePOPCNT, WritePOPCNT.Folded, "_NF">, NF; 5821*0fca6ea1SDimitry Andricdefm POPCNT64 : Lzcnt<0x88, "popcnt", null_frag, Xi64, WritePOPCNT, WritePOPCNT.Folded, "_NF">, NF; 5822*0fca6ea1SDimitry Andric 58230b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 58240b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 58250b57cec5SDimitry Andric SDNode OpNode, PatFrag ld_frag, 58260b57cec5SDimitry Andric X86FoldableSchedWrite Sched> { 58270b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 58280b57cec5SDimitry Andric (ins VR128:$src), 58290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 58300b57cec5SDimitry Andric [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 58310b57cec5SDimitry Andric Sched<[Sched]>; 58320b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 58330b57cec5SDimitry Andric (ins i128mem:$src), 58340b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 58350b57cec5SDimitry Andric [(set VR128:$dst, 58360b57cec5SDimitry Andric (v8i16 (OpNode (ld_frag addr:$src))))]>, 58370b57cec5SDimitry Andric Sched<[Sched.Folded]>; 58380b57cec5SDimitry Andric} 58390b57cec5SDimitry Andric 58400b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling 58410b57cec5SDimitry Andric// model, although the naming is misleading. 58420b57cec5SDimitry Andriclet Predicates = [HasAVX] in 58430b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 58440b57cec5SDimitry Andric X86phminpos, load, 584506c3fb27SDimitry Andric WritePHMINPOS>, VEX, WIG; 58460b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 58470b57cec5SDimitry Andric X86phminpos, memop, 58480b57cec5SDimitry Andric WritePHMINPOS>; 58490b57cec5SDimitry Andric 58500b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator. 58510b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 58520b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 58530b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 58540b57cec5SDimitry Andric bit Is2Addr = 1> { 58550b57cec5SDimitry Andric let isCommutable = 1 in 58560b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 58570b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 58580b57cec5SDimitry Andric !if(Is2Addr, 58590b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 58600b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 58610b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 58620b57cec5SDimitry Andric Sched<[sched]>; 58630b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 58640b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 58650b57cec5SDimitry Andric !if(Is2Addr, 58660b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 58670b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 58680b57cec5SDimitry Andric [(set RC:$dst, 58690b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 58700b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 58710b57cec5SDimitry Andric} 58720b57cec5SDimitry Andric 58730b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 58740b57cec5SDimitry Andric defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 58750b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5876cb14a3feSDimitry Andric VEX, VVVV, WIG; 58770b57cec5SDimitry Andric defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 58780b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5879cb14a3feSDimitry Andric VEX, VVVV, WIG; 58800b57cec5SDimitry Andric defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 58810b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5882cb14a3feSDimitry Andric VEX, VVVV, WIG; 58830b57cec5SDimitry Andric defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 58840b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5885cb14a3feSDimitry Andric VEX, VVVV, WIG; 58860b57cec5SDimitry Andric defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 58870b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 5888cb14a3feSDimitry Andric VEX, VVVV, WIG; 58890b57cec5SDimitry Andric} 58900b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 58910b57cec5SDimitry Andric defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 58920b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5893cb14a3feSDimitry Andric VEX, VVVV, WIG; 58940b57cec5SDimitry Andric defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 58950b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5896cb14a3feSDimitry Andric VEX, VVVV, WIG; 58970b57cec5SDimitry Andric defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 58980b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5899cb14a3feSDimitry Andric VEX, VVVV, WIG; 59000b57cec5SDimitry Andric defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 59010b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5902cb14a3feSDimitry Andric VEX, VVVV, WIG; 59030b57cec5SDimitry Andric} 59040b57cec5SDimitry Andric 59050b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 59060b57cec5SDimitry Andric defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 59070b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5908cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59090b57cec5SDimitry Andric defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 59100b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5911cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59120b57cec5SDimitry Andric defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 59130b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5914cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59150b57cec5SDimitry Andric defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 59160b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5917cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59180b57cec5SDimitry Andric defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 59190b57cec5SDimitry Andric load, i256mem, SchedWriteVecIMul.YMM, 0>, 5920cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59210b57cec5SDimitry Andric} 59220b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 59230b57cec5SDimitry Andric defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 59240b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5925cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59260b57cec5SDimitry Andric defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 59270b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5928cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59290b57cec5SDimitry Andric defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 59300b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5931cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59320b57cec5SDimitry Andric defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 59330b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5934cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59350b57cec5SDimitry Andric} 59360b57cec5SDimitry Andric 59370b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 59380b57cec5SDimitry Andric defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 59390b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59400b57cec5SDimitry Andric defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 59410b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59420b57cec5SDimitry Andric defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 59430b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59440b57cec5SDimitry Andric defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 59450b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59460b57cec5SDimitry Andric defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 59470b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59480b57cec5SDimitry Andric defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 59490b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59500b57cec5SDimitry Andric defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 59510b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59520b57cec5SDimitry Andric defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 59530b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59540b57cec5SDimitry Andric defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 59550b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM, 1>; 59560b57cec5SDimitry Andric} 59570b57cec5SDimitry Andric 59580b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 59590b57cec5SDimitry Andric defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 59600b57cec5SDimitry Andric load, i128mem, SchedWritePMULLD.XMM, 0>, 5961cb14a3feSDimitry Andric VEX, VVVV, WIG; 59620b57cec5SDimitry Andriclet Predicates = [HasAVX] in 59630b57cec5SDimitry Andric defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 59640b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5965cb14a3feSDimitry Andric VEX, VVVV, WIG; 59660b57cec5SDimitry Andric 59670b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 59680b57cec5SDimitry Andric defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 59690b57cec5SDimitry Andric load, i256mem, SchedWritePMULLD.YMM, 0>, 5970cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59710b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 59720b57cec5SDimitry Andric defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 59730b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5974cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 59750b57cec5SDimitry Andric 59760b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 59770b57cec5SDimitry Andric defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 59780b57cec5SDimitry Andric memop, i128mem, SchedWritePMULLD.XMM, 1>; 59790b57cec5SDimitry Andric defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 59800b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 59810b57cec5SDimitry Andric} 59820b57cec5SDimitry Andric 59830b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 59840b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 59850b57cec5SDimitry Andric Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 59860b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 59870b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 59880b57cec5SDimitry Andric let isCommutable = 1 in 59890b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 59900b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 59910b57cec5SDimitry Andric !if(Is2Addr, 59920b57cec5SDimitry Andric !strconcat(OpcodeStr, 59930b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 59940b57cec5SDimitry Andric !strconcat(OpcodeStr, 59950b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 59968bcb0991SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, 59970b57cec5SDimitry Andric Sched<[sched]>; 59980b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 59990b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 60000b57cec5SDimitry Andric !if(Is2Addr, 60010b57cec5SDimitry Andric !strconcat(OpcodeStr, 60020b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 60030b57cec5SDimitry Andric !strconcat(OpcodeStr, 60040b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 60050b57cec5SDimitry Andric [(set RC:$dst, 60068bcb0991SDimitry Andric (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, 60070b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 60080b57cec5SDimitry Andric} 60090b57cec5SDimitry Andric 60100b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 60110b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 60120b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 60130b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 60140b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 60150b57cec5SDimitry Andric let isCommutable = 1 in 60160b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 60170b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 60180b57cec5SDimitry Andric !if(Is2Addr, 60190b57cec5SDimitry Andric !strconcat(OpcodeStr, 60200b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 60210b57cec5SDimitry Andric !strconcat(OpcodeStr, 60220b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 60238bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 60240b57cec5SDimitry Andric Sched<[sched]>; 60250b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 60260b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 60270b57cec5SDimitry Andric !if(Is2Addr, 60280b57cec5SDimitry Andric !strconcat(OpcodeStr, 60290b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 60300b57cec5SDimitry Andric !strconcat(OpcodeStr, 60310b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 60320b57cec5SDimitry Andric [(set RC:$dst, 60338bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 60340b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 60350b57cec5SDimitry Andric} 60360b57cec5SDimitry Andric 60378bcb0991SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<timm, [{ 60380b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x03; 60390b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x03, SDLoc(N)); 60400b57cec5SDimitry Andric}]>; 60410b57cec5SDimitry Andric 60428bcb0991SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<timm, [{ 60430b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x0f; 60440b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 60450b57cec5SDimitry Andric}]>; 60460b57cec5SDimitry Andric 60478bcb0991SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<timm, [{ 60480b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0xff; 60490b57cec5SDimitry Andric return getI8Imm(Imm ^ 0xff, SDLoc(N)); 60500b57cec5SDimitry Andric}]>; 60510b57cec5SDimitry Andric 60520b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 60538bcb0991SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<timm, [{ 60540b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60550b57cec5SDimitry Andric uint8_t NewImm = 0; 60560b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 60570b57cec5SDimitry Andric if (Imm & (1 << i)) 60580b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60590b57cec5SDimitry Andric } 60600b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60610b57cec5SDimitry Andric}]>; 60620b57cec5SDimitry Andric 60630b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 60648bcb0991SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<timm, [{ 60650b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60660b57cec5SDimitry Andric uint8_t NewImm = 0; 60670b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60680b57cec5SDimitry Andric if (Imm & (1 << i)) 60690b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 60700b57cec5SDimitry Andric } 60710b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60720b57cec5SDimitry Andric}]>; 60730b57cec5SDimitry Andric 60740b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 60758bcb0991SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<timm, [{ 60760b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60770b57cec5SDimitry Andric uint8_t NewImm = 0; 60780b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 60790b57cec5SDimitry Andric if (Imm & (1 << i)) 60800b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60810b57cec5SDimitry Andric } 60820b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 60830b57cec5SDimitry Andric}]>; 60840b57cec5SDimitry Andric 60850b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 60868bcb0991SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<timm, [{ 60870b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60880b57cec5SDimitry Andric uint8_t NewImm = 0; 60890b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 60900b57cec5SDimitry Andric if (Imm & (1 << i)) 60910b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 60920b57cec5SDimitry Andric } 60930b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 60940b57cec5SDimitry Andric}]>; 60950b57cec5SDimitry Andric 60960b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 60978bcb0991SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<timm, [{ 60980b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 60990b57cec5SDimitry Andric uint8_t NewImm = 0; 61000b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 61010b57cec5SDimitry Andric if (Imm & (1 << i)) 61020b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 61030b57cec5SDimitry Andric } 61040b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 61050b57cec5SDimitry Andric}]>; 61060b57cec5SDimitry Andric 61070b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 61088bcb0991SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ 61090b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 61100b57cec5SDimitry Andric uint8_t NewImm = 0; 61110b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 61120b57cec5SDimitry Andric if (Imm & (1 << i)) 61130b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 61140b57cec5SDimitry Andric } 61150b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 61160b57cec5SDimitry Andric}]>; 61170b57cec5SDimitry Andric 61180b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 61190b57cec5SDimitry Andric let isCommutable = 0 in { 61200b57cec5SDimitry Andric defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 61210b57cec5SDimitry Andric VR128, load, i128mem, 0, 6122cb14a3feSDimitry Andric SchedWriteMPSAD.XMM>, VEX, VVVV, WIG; 61230b57cec5SDimitry Andric } 61240b57cec5SDimitry Andric 6125480093f4SDimitry Andriclet Uses = [MXCSR], mayRaiseFPException = 1 in { 61260b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 61270b57cec5SDimitry Andric defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 61280b57cec5SDimitry Andric VR128, load, f128mem, 0, 6129cb14a3feSDimitry Andric SchedWriteDPPS.XMM>, VEX, VVVV, WIG; 61300b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 61310b57cec5SDimitry Andric defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 61320b57cec5SDimitry Andric VR128, load, f128mem, 0, 6133cb14a3feSDimitry Andric SchedWriteDPPD.XMM>, VEX, VVVV, WIG; 61340b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 61350b57cec5SDimitry Andric defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 61360b57cec5SDimitry Andric VR256, load, i256mem, 0, 6137cb14a3feSDimitry Andric SchedWriteDPPS.YMM>, VEX, VVVV, VEX_L, WIG; 61380b57cec5SDimitry Andric} 6139480093f4SDimitry Andric} 61400b57cec5SDimitry Andric 61410b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 61420b57cec5SDimitry Andric let isCommutable = 0 in { 61430b57cec5SDimitry Andric defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 61440b57cec5SDimitry Andric VR256, load, i256mem, 0, 6145cb14a3feSDimitry Andric SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG; 61460b57cec5SDimitry Andric } 61470b57cec5SDimitry Andric} 61480b57cec5SDimitry Andric 61490b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 61500b57cec5SDimitry Andric let isCommutable = 0 in { 61510b57cec5SDimitry Andric defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 61520b57cec5SDimitry Andric VR128, memop, i128mem, 1, 61530b57cec5SDimitry Andric SchedWriteMPSAD.XMM>; 61540b57cec5SDimitry Andric } 61550b57cec5SDimitry Andric 61560b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 61570b57cec5SDimitry Andric defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 61580b57cec5SDimitry Andric VR128, memop, f128mem, 1, 6159480093f4SDimitry Andric SchedWriteDPPS.XMM>, SIMD_EXC; 61600b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 61610b57cec5SDimitry Andric defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 61620b57cec5SDimitry Andric VR128, memop, f128mem, 1, 6163480093f4SDimitry Andric SchedWriteDPPD.XMM>, SIMD_EXC; 61640b57cec5SDimitry Andric} 61650b57cec5SDimitry Andric 61660b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 61670b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 61680b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 61690b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, Domain d, 61700b57cec5SDimitry Andric X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 61710b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 61720b57cec5SDimitry Andric let isCommutable = 1 in 61730b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 61740b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 61750b57cec5SDimitry Andric !if(Is2Addr, 61760b57cec5SDimitry Andric !strconcat(OpcodeStr, 61770b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 61780b57cec5SDimitry Andric !strconcat(OpcodeStr, 61790b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 61808bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 61810b57cec5SDimitry Andric Sched<[sched]>; 61820b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 61830b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 61840b57cec5SDimitry Andric !if(Is2Addr, 61850b57cec5SDimitry Andric !strconcat(OpcodeStr, 61860b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 61870b57cec5SDimitry Andric !strconcat(OpcodeStr, 61880b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 61890b57cec5SDimitry Andric [(set RC:$dst, 61908bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 61910b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 61920b57cec5SDimitry Andric} 61930b57cec5SDimitry Andric 61940b57cec5SDimitry Andric // Pattern to commute if load is in first source. 61958bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), 61960b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 61978bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 61980b57cec5SDimitry Andric} 61990b57cec5SDimitry Andric 62000b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 62010b57cec5SDimitry Andric defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 62020b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedSingle, 62030b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>, 6204cb14a3feSDimitry Andric VEX, VVVV, WIG; 62050b57cec5SDimitry Andric defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 62060b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedSingle, 62070b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm8>, 6208cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 62090b57cec5SDimitry Andric defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 62100b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedDouble, 62110b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>, 6212cb14a3feSDimitry Andric VEX, VVVV, WIG; 62130b57cec5SDimitry Andric defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 62140b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedDouble, 62150b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm4>, 6216cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 62170b57cec5SDimitry Andric defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 62180b57cec5SDimitry Andric VR128, load, i128mem, 0, SSEPackedInt, 62190b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>, 6220cb14a3feSDimitry Andric VEX, VVVV, WIG; 62210b57cec5SDimitry Andric} 62220b57cec5SDimitry Andric 62230b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 62240b57cec5SDimitry Andric defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 62250b57cec5SDimitry Andric VR256, load, i256mem, 0, SSEPackedInt, 62260b57cec5SDimitry Andric SchedWriteBlend.YMM, BlendCommuteImm8>, 6227cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 62280b57cec5SDimitry Andric} 62290b57cec5SDimitry Andric 62300b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 62310b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on. 62320b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 62338bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 62348bcb0991SDimitry Andric (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; 62358bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 62368bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; 62378bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 62388bcb0991SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; 62390b57cec5SDimitry Andric 62400b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62410b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize. 62428bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 62438bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 62448bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 62458bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 62468bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 62478bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 62480b57cec5SDimitry Andric 62498bcb0991SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), 62508bcb0991SDimitry Andric (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; 62518bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), 62528bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; 62538bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), 62548bcb0991SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; 62550b57cec5SDimitry Andric 62560b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62570b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 62588bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 62598bcb0991SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 62608bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), 62618bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 62628bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), 62638bcb0991SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 62640b57cec5SDimitry Andric} 62650b57cec5SDimitry Andric 62660b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 62670b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedSingle, 62680b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>; 62690b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 62700b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedDouble, 62710b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>; 62720b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 62730b57cec5SDimitry Andric VR128, memop, i128mem, 1, SSEPackedInt, 62740b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>; 62750b57cec5SDimitry Andric 62760b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 62770b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 62780b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 62798bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 62808bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 62818bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), 62828bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 62838bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), 62848bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 62850b57cec5SDimitry Andric 62868bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 62878bcb0991SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 62888bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), 62898bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 62908bcb0991SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), 62918bcb0991SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 62920b57cec5SDimitry Andric} 62930b57cec5SDimitry Andric 62940b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 62950b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 62960b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 62970b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 62980b57cec5SDimitry Andric (VBLENDPDYrri VR256:$src1, 62990b57cec5SDimitry Andric (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 63000b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0x3)>; 63010b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 63020b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 63030b57cec5SDimitry Andric (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 63040b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 63050b57cec5SDimitry Andric 63060b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 63070b57cec5SDimitry Andric (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 63080b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 63090b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 63100b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 63110b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 63120b57cec5SDimitry Andric} 63130b57cec5SDimitry Andric 63140b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 63150b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 63160b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 63170b57cec5SDimitry Andric PatFrag mem_frag, SDNode OpNode, 63180b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 6319*0fca6ea1SDimitry Andric def rrr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 63200b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 63210b57cec5SDimitry Andric !strconcat(OpcodeStr, 63220b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 63230b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 6324cb14a3feSDimitry Andric SSEPackedInt>, TA, PD, VEX, VVVV, 63250b57cec5SDimitry Andric Sched<[sched]>; 63260b57cec5SDimitry Andric 6327*0fca6ea1SDimitry Andric def rmr : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 63280b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, RC:$src3), 63290b57cec5SDimitry Andric !strconcat(OpcodeStr, 63300b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 63310b57cec5SDimitry Andric [(set RC:$dst, 63320b57cec5SDimitry Andric (OpNode RC:$src3, (mem_frag addr:$src2), 6333cb14a3feSDimitry Andric RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV, 63340b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold, 63350b57cec5SDimitry Andric // x86memop:$src2 63360b57cec5SDimitry Andric ReadDefault, ReadDefault, ReadDefault, ReadDefault, 63370b57cec5SDimitry Andric ReadDefault, 63380b57cec5SDimitry Andric // RC::$src3 63390b57cec5SDimitry Andric sched.ReadAfterFold]>; 63400b57cec5SDimitry Andric} 63410b57cec5SDimitry Andric 63420b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 63430b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 63440b57cec5SDimitry Andricdefm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 63450b57cec5SDimitry Andric v2f64, loadv2f64, X86Blendv, 63460b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 63470b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 63480b57cec5SDimitry Andric v4f64, loadv4f64, X86Blendv, 63490b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 63500b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble 63510b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 63520b57cec5SDimitry Andricdefm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 63530b57cec5SDimitry Andric v4f32, loadv4f32, X86Blendv, 63540b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 63550b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 63560b57cec5SDimitry Andric v8f32, loadv8f32, X86Blendv, 63570b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 63580b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle 63590b57cec5SDimitry Andricdefm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 63600b57cec5SDimitry Andric v16i8, loadv16i8, X86Blendv, 63610b57cec5SDimitry Andric SchedWriteVarBlend.XMM>; 63620b57cec5SDimitry Andric} 63630b57cec5SDimitry Andric 63640b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 63650b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 63660b57cec5SDimitry Andric v32i8, loadv32i8, X86Blendv, 63670b57cec5SDimitry Andric SchedWriteVarBlend.YMM>, VEX_L; 63680b57cec5SDimitry Andric} 63690b57cec5SDimitry Andric 63700b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 63710b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 63720b57cec5SDimitry Andric (v4i32 VR128:$src2))), 6373*0fca6ea1SDimitry Andric (VBLENDVPSrrr VR128:$src2, VR128:$src1, VR128:$mask)>; 63740b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 63750b57cec5SDimitry Andric (v2i64 VR128:$src2))), 6376*0fca6ea1SDimitry Andric (VBLENDVPDrrr VR128:$src2, VR128:$src1, VR128:$mask)>; 63770b57cec5SDimitry Andric def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 63780b57cec5SDimitry Andric (v8i32 VR256:$src2))), 6379*0fca6ea1SDimitry Andric (VBLENDVPSYrrr VR256:$src2, VR256:$src1, VR256:$mask)>; 63800b57cec5SDimitry Andric def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 63810b57cec5SDimitry Andric (v4i64 VR256:$src2))), 6382*0fca6ea1SDimitry Andric (VBLENDVPDYrrr VR256:$src2, VR256:$src1, VR256:$mask)>; 63830b57cec5SDimitry Andric} 63840b57cec5SDimitry Andric 63850b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 63860b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 63870b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 63880b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in { 63890b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 63900b57cec5SDimitry Andric (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 63910b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 63920b57cec5SDimitry Andric (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 63930b57cec5SDimitry Andric 63940b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 63950b57cec5SDimitry Andric (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 63960b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 63970b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 63980b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 63990b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 64000b57cec5SDimitry Andric 64010b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 64020b57cec5SDimitry Andric (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 64030b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 64040b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 64050b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 64060b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 64070b57cec5SDimitry Andric 64080b57cec5SDimitry Andric // Move low f32 and clear high bits. 64090b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 64100b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 64110b57cec5SDimitry Andric (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 64120b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 64130b57cec5SDimitry Andric (i8 1))), sub_xmm)>; 64140b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 64150b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 64160b57cec5SDimitry Andric (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 64170b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 64180b57cec5SDimitry Andric (i8 3))), sub_xmm)>; 64190b57cec5SDimitry Andric} 64200b57cec5SDimitry Andric 64210b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 64220b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 64230b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 64240b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in { 64250b57cec5SDimitry Andric // With SSE41 we can use blends for these patterns. 64260b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 64270b57cec5SDimitry Andric (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 64280b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 64290b57cec5SDimitry Andric (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 64300b57cec5SDimitry Andric 64310b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 64320b57cec5SDimitry Andric (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 64330b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 64340b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 64350b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 64360b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 64370b57cec5SDimitry Andric 64380b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 64390b57cec5SDimitry Andric (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 64400b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 64410b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 64420b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 64430b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 64440b57cec5SDimitry Andric} 64450b57cec5SDimitry Andric 64460b57cec5SDimitry Andric 64470b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator 64480b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in { 64490b57cec5SDimitry Andric multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 64500b57cec5SDimitry Andric PatFrag mem_frag, X86MemOperand x86memop, 64510b57cec5SDimitry Andric SDNode OpNode, X86FoldableSchedWrite sched> { 64520b57cec5SDimitry Andric def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 64530b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 64540b57cec5SDimitry Andric !strconcat(OpcodeStr, 64550b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 64560b57cec5SDimitry Andric [(set VR128:$dst, 64570b57cec5SDimitry Andric (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 64580b57cec5SDimitry Andric Sched<[sched]>; 64590b57cec5SDimitry Andric 64600b57cec5SDimitry Andric def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 64610b57cec5SDimitry Andric (ins VR128:$src1, x86memop:$src2), 64620b57cec5SDimitry Andric !strconcat(OpcodeStr, 64630b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 64640b57cec5SDimitry Andric [(set VR128:$dst, 64650b57cec5SDimitry Andric (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 64660b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 64670b57cec5SDimitry Andric } 64680b57cec5SDimitry Andric} 64690b57cec5SDimitry Andric 64700b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 64710b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 64720b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 64730b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 64740b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 64750b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 64760b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 64770b57cec5SDimitry Andric X86Blendv, SchedWriteVarBlend.XMM>; 64780b57cec5SDimitry Andric 64790b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument 64800b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 64810b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 64820b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 64830b57cec5SDimitry Andric (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 64840b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 64850b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 64860b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 64870b57cec5SDimitry Andric (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 64880b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 64890b57cec5SDimitry Andric (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 64900b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 64910b57cec5SDimitry Andric (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 64920b57cec5SDimitry Andric 64930b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 64940b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 64950b57cec5SDimitry Andric (v4i32 VR128:$src2))), 64960b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 64970b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 64980b57cec5SDimitry Andric (v2i64 VR128:$src2))), 64990b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 65000b57cec5SDimitry Andric} 65010b57cec5SDimitry Andric 65020b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 65030b57cec5SDimitry Andric 65040b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 65050b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 65060b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 650706c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, WIG; 65080b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 65090b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 65100b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 651106c3fb27SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, WIG; 65120b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 65130b57cec5SDimitry Andric "movntdqa\t{$src, $dst|$dst, $src}", []>, 65140b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 65150b57cec5SDimitry Andric 65160b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 65170b57cec5SDimitry Andric def : Pat<(v8f32 (alignednontemporalload addr:$src)), 65180b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65190b57cec5SDimitry Andric def : Pat<(v4f64 (alignednontemporalload addr:$src)), 65200b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65210b57cec5SDimitry Andric def : Pat<(v4i64 (alignednontemporalload addr:$src)), 65220b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65230b57cec5SDimitry Andric def : Pat<(v8i32 (alignednontemporalload addr:$src)), 65240b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65250b57cec5SDimitry Andric def : Pat<(v16i16 (alignednontemporalload addr:$src)), 65260b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6527fcaf7f86SDimitry Andric def : Pat<(v16f16 (alignednontemporalload addr:$src)), 6528fcaf7f86SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65290b57cec5SDimitry Andric def : Pat<(v32i8 (alignednontemporalload addr:$src)), 65300b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 65310b57cec5SDimitry Andric} 65320b57cec5SDimitry Andric 65330b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 65340b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 65350b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65360b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 65370b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65380b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 65390b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65400b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 65410b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65420b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 65430b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6544fcaf7f86SDimitry Andric def : Pat<(v8f16 (alignednontemporalload addr:$src)), 6545fcaf7f86SDimitry Andric (VMOVNTDQArm addr:$src)>; 65460b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 65470b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 65480b57cec5SDimitry Andric} 65490b57cec5SDimitry Andric 65500b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 65510b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 65520b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65530b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 65540b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65550b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 65560b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65570b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 65580b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65590b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 65600b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6561fcaf7f86SDimitry Andric def : Pat<(v8f16 (alignednontemporalload addr:$src)), 6562fcaf7f86SDimitry Andric (MOVNTDQArm addr:$src)>; 65630b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 65640b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 65650b57cec5SDimitry Andric} 65660b57cec5SDimitry Andric 65670b57cec5SDimitry Andric} // AddedComplexity 65680b57cec5SDimitry Andric 65690b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65700b57cec5SDimitry Andric// SSE4.2 - Compare Instructions 65710b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 65720b57cec5SDimitry Andric 65730b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator 65740b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 65750b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 65760b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 65770b57cec5SDimitry Andric bit Is2Addr = 1> { 65780b57cec5SDimitry Andric def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 65790b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 65800b57cec5SDimitry Andric !if(Is2Addr, 65810b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 65820b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 65830b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 65840b57cec5SDimitry Andric Sched<[sched]>; 65850b57cec5SDimitry Andric def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 65860b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 65870b57cec5SDimitry Andric !if(Is2Addr, 65880b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 65890b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 65900b57cec5SDimitry Andric [(set RC:$dst, 65910b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 65920b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 65930b57cec5SDimitry Andric} 65940b57cec5SDimitry Andric 65950b57cec5SDimitry Andriclet Predicates = [HasAVX] in 65960b57cec5SDimitry Andric defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 65970b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 6598cb14a3feSDimitry Andric VEX, VVVV, WIG; 65990b57cec5SDimitry Andric 66000b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 66010b57cec5SDimitry Andric defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 66020b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 6603cb14a3feSDimitry Andric VEX, VVVV, VEX_L, WIG; 66040b57cec5SDimitry Andric 66050b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 66060b57cec5SDimitry Andric defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 66070b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM>; 66080b57cec5SDimitry Andric 66090b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66100b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions 66110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66120b57cec5SDimitry Andric 66130b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> { 6614*0fca6ea1SDimitry Andric def rri : SS42AI<0x62, MRMSrcReg, (outs), 66150b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 66160b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66170b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM]>; 66180b57cec5SDimitry Andric let mayLoad = 1 in 6619*0fca6ea1SDimitry Andric def rmi :SS42AI<0x62, MRMSrcMem, (outs), 66200b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 66210b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66220b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 66230b57cec5SDimitry Andric} 66240b57cec5SDimitry Andric 66250b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 66260b57cec5SDimitry Andric let Predicates = [HasAVX] in 662706c3fb27SDimitry Andric defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, WIG; 66280b57cec5SDimitry Andric defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 66290b57cec5SDimitry Andric} 66300b57cec5SDimitry Andric 66310b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> { 6632*0fca6ea1SDimitry Andric def rri : SS42AI<0x60, MRMSrcReg, (outs), 66330b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 66340b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66350b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM]>; 66360b57cec5SDimitry Andric let mayLoad = 1 in 6637*0fca6ea1SDimitry Andric def rmi : SS42AI<0x60, MRMSrcMem, (outs), 66380b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 66390b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66400b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 66410b57cec5SDimitry Andric} 66420b57cec5SDimitry Andric 66430b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 66440b57cec5SDimitry Andric let Predicates = [HasAVX] in 664506c3fb27SDimitry Andric defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, WIG; 66460b57cec5SDimitry Andric defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 66470b57cec5SDimitry Andric} 66480b57cec5SDimitry Andric 66490b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> { 6650*0fca6ea1SDimitry Andric def rri : SS42AI<0x63, MRMSrcReg, (outs), 66510b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 66520b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66530b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI]>; 66540b57cec5SDimitry Andric let mayLoad = 1 in 6655*0fca6ea1SDimitry Andric def rmi : SS42AI<0x63, MRMSrcMem, (outs), 66560b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 66570b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 66580b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 66590b57cec5SDimitry Andric} 66600b57cec5SDimitry Andric 66610b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 66620b57cec5SDimitry Andric let Predicates = [HasAVX] in 666306c3fb27SDimitry Andric defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, WIG; 66640b57cec5SDimitry Andric defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 66650b57cec5SDimitry Andric} 66660b57cec5SDimitry Andric 66670b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> { 6668*0fca6ea1SDimitry Andric def rri : SS42AI<0x61, MRMSrcReg, (outs), 66690b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 66700b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66710b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI]>; 66720b57cec5SDimitry Andric let mayLoad = 1 in 6673*0fca6ea1SDimitry Andric def rmi : SS42AI<0x61, MRMSrcMem, (outs), 66740b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 66750b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 66760b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 66770b57cec5SDimitry Andric} 66780b57cec5SDimitry Andric 66790b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 66800b57cec5SDimitry Andric let Predicates = [HasAVX] in 668106c3fb27SDimitry Andric defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, WIG; 66820b57cec5SDimitry Andric defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 66830b57cec5SDimitry Andric} 66840b57cec5SDimitry Andric 66850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66860b57cec5SDimitry Andric// SSE4.2 - CRC Instructions 66870b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 66880b57cec5SDimitry Andric 6689647cbc5dSDimitry Andric// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly 6690647cbc5dSDimitry Andric// controlled by the SSE42 flag. 6691647cbc5dSDimitry Andric// 66920b57cec5SDimitry Andric// No CRC instructions have AVX equivalents 66930b57cec5SDimitry Andric 6694647cbc5dSDimitry Andricclass Crc32r<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> 6695647cbc5dSDimitry Andric : ITy<0xF1, MRMSrcReg, t, (outs rc:$dst), (ins rc:$src1, t.RegClass:$src2), 6696647cbc5dSDimitry Andric "crc32", binop_args, [(set rc:$dst, (node rc:$src1, t.RegClass:$src2))]>, 66977a6dacacSDimitry Andric Sched<[WriteCRC32]> { 6698647cbc5dSDimitry Andric let Constraints = "$src1 = $dst"; 66990b57cec5SDimitry Andric} 6700647cbc5dSDimitry Andric 6701647cbc5dSDimitry Andricclass Crc32m<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> 6702647cbc5dSDimitry Andric : ITy<0xF1, MRMSrcMem, t, (outs rc:$dst), (ins rc:$src1, t.MemOperand:$src2), 6703647cbc5dSDimitry Andric "crc32", binop_args, [(set rc:$dst, (node rc:$src1, (load addr:$src2)))]>, 67047a6dacacSDimitry Andric Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]> { 6705647cbc5dSDimitry Andric let Constraints = "$src1 = $dst"; 6706647cbc5dSDimitry Andric} 6707647cbc5dSDimitry Andric 6708647cbc5dSDimitry Andriclet Predicates = [HasCRC32, NoEGPR], OpMap = T8, OpPrefix = XD in { 6709647cbc5dSDimitry Andric def CRC32r32r8 : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; 6710647cbc5dSDimitry Andric def CRC32r32m8 : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; 6711647cbc5dSDimitry Andric def CRC32r32r16 : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; 6712647cbc5dSDimitry Andric def CRC32r32m16 : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; 6713647cbc5dSDimitry Andric def CRC32r32r32 : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; 6714647cbc5dSDimitry Andric def CRC32r32m32 : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; 6715647cbc5dSDimitry Andric def CRC32r64r64 : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; 6716647cbc5dSDimitry Andric def CRC32r64m64 : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; 6717647cbc5dSDimitry Andric def CRC32r64r8 : Crc32r<Xi8, GR64, null_frag>, REX_W; 6718647cbc5dSDimitry Andric let mayLoad = 1 in 6719647cbc5dSDimitry Andric def CRC32r64m8 : Crc32m<Xi8, GR64, null_frag>, REX_W; 6720647cbc5dSDimitry Andric} 6721647cbc5dSDimitry Andric 6722647cbc5dSDimitry Andriclet Predicates = [HasCRC32, HasEGPR, In64BitMode], OpMap = T_MAP4, OpEnc = EncEVEX in { 6723647cbc5dSDimitry Andric def CRC32r32r8_EVEX : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; 6724647cbc5dSDimitry Andric def CRC32r32m8_EVEX : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; 6725647cbc5dSDimitry Andric def CRC32r32r16_EVEX : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; 6726647cbc5dSDimitry Andric def CRC32r32m16_EVEX : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; 6727647cbc5dSDimitry Andric def CRC32r32r32_EVEX : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>; 6728647cbc5dSDimitry Andric def CRC32r32m32_EVEX : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>; 6729647cbc5dSDimitry Andric def CRC32r64r64_EVEX : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; 6730647cbc5dSDimitry Andric def CRC32r64m64_EVEX : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; 6731647cbc5dSDimitry Andric def CRC32r64r8_EVEX : Crc32r<Xi8, GR64, null_frag>, REX_W; 6732647cbc5dSDimitry Andric let mayLoad = 1 in 6733647cbc5dSDimitry Andric def CRC32r64m8_EVEX : Crc32m<Xi8, GR64, null_frag>, REX_W; 67340b57cec5SDimitry Andric} 67350b57cec5SDimitry Andric 67360b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67370b57cec5SDimitry Andric// SHA-NI Instructions 67380b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 67390b57cec5SDimitry Andric 67400b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 67410b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 6742*0fca6ea1SDimitry Andric X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 6743*0fca6ea1SDimitry Andric def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 67440b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 67450b57cec5SDimitry Andric !if(UsesXMM0, 67460b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 67470b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 67480b57cec5SDimitry Andric [!if(UsesXMM0, 67490b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 67500b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 6751cb14a3feSDimitry Andric T8, Sched<[sched]>; 67520b57cec5SDimitry Andric 6753*0fca6ea1SDimitry Andric def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 67540b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 67550b57cec5SDimitry Andric !if(UsesXMM0, 67560b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 67570b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 67580b57cec5SDimitry Andric [!if(UsesXMM0, 67590b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 67600b57cec5SDimitry Andric (memop addr:$src2), XMM0)), 67610b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 6762cb14a3feSDimitry Andric (memop addr:$src2))))]>, T8, 67630b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 67640b57cec5SDimitry Andric} 67650b57cec5SDimitry Andric 6766*0fca6ea1SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 67670b57cec5SDimitry Andric def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 67680b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 67690b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67700b57cec5SDimitry Andric [(set VR128:$dst, 67710b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 6772cb14a3feSDimitry Andric (i8 timm:$src3)))]>, TA, 67730b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM]>; 67740b57cec5SDimitry Andric def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 67750b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 67760b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 67770b57cec5SDimitry Andric [(set VR128:$dst, 67780b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, 67790b57cec5SDimitry Andric (memop addr:$src2), 6780cb14a3feSDimitry Andric (i8 timm:$src3)))]>, TA, 67810b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM.Folded, 67820b57cec5SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold]>; 67830b57cec5SDimitry Andric 67840b57cec5SDimitry Andric defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 67850b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67860b57cec5SDimitry Andric defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 67870b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67880b57cec5SDimitry Andric defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 67890b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67900b57cec5SDimitry Andric 67910b57cec5SDimitry Andric let Uses=[XMM0] in 67920b57cec5SDimitry Andric defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 6793*0fca6ea1SDimitry Andric SchedWriteVecIMul.XMM, 1>; 67940b57cec5SDimitry Andric 67950b57cec5SDimitry Andric defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 67960b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67970b57cec5SDimitry Andric defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 67980b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 67990b57cec5SDimitry Andric} 68000b57cec5SDimitry Andric 68010b57cec5SDimitry Andric 68020b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68030b57cec5SDimitry Andric// AES-NI Instructions 68040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 68050b57cec5SDimitry Andric 68060b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 68070b57cec5SDimitry Andric Intrinsic IntId, PatFrag ld_frag, 68080b57cec5SDimitry Andric bit Is2Addr = 0, RegisterClass RC = VR128, 68090b57cec5SDimitry Andric X86MemOperand MemOp = i128mem> { 68105ffd83dbSDimitry Andric let AsmString = OpcodeStr# 68110b57cec5SDimitry Andric !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 68120b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 68130b57cec5SDimitry Andric def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 68140b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), "", 68150b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 68160b57cec5SDimitry Andric Sched<[WriteAESDecEnc]>; 68170b57cec5SDimitry Andric def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 68180b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2), "", 68190b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 68200b57cec5SDimitry Andric Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 68210b57cec5SDimitry Andric } 68220b57cec5SDimitry Andric} 68230b57cec5SDimitry Andric 68240b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow 68250b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 68260b57cec5SDimitry Andric defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 6827cb14a3feSDimitry Andric int_x86_aesni_aesenc, load>, VEX, VVVV, WIG; 68280b57cec5SDimitry Andric defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 6829cb14a3feSDimitry Andric int_x86_aesni_aesenclast, load>, VEX, VVVV, WIG; 68300b57cec5SDimitry Andric defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 6831cb14a3feSDimitry Andric int_x86_aesni_aesdec, load>, VEX, VVVV, WIG; 68320b57cec5SDimitry Andric defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 6833cb14a3feSDimitry Andric int_x86_aesni_aesdeclast, load>, VEX, VVVV, WIG; 68340b57cec5SDimitry Andric} 68350b57cec5SDimitry Andric 68360b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in { 68370b57cec5SDimitry Andric defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 68380b57cec5SDimitry Andric int_x86_aesni_aesenc_256, load, 0, VR256, 6839cb14a3feSDimitry Andric i256mem>, VEX, VVVV, VEX_L, WIG; 68400b57cec5SDimitry Andric defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 68410b57cec5SDimitry Andric int_x86_aesni_aesenclast_256, load, 0, VR256, 6842cb14a3feSDimitry Andric i256mem>, VEX, VVVV, VEX_L, WIG; 68430b57cec5SDimitry Andric defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 68440b57cec5SDimitry Andric int_x86_aesni_aesdec_256, load, 0, VR256, 6845cb14a3feSDimitry Andric i256mem>, VEX, VVVV, VEX_L, WIG; 68460b57cec5SDimitry Andric defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 68470b57cec5SDimitry Andric int_x86_aesni_aesdeclast_256, load, 0, VR256, 6848cb14a3feSDimitry Andric i256mem>, VEX, VVVV, VEX_L, WIG; 68490b57cec5SDimitry Andric} 68500b57cec5SDimitry Andric 68510b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 68520b57cec5SDimitry Andric defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 68530b57cec5SDimitry Andric int_x86_aesni_aesenc, memop, 1>; 68540b57cec5SDimitry Andric defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 68550b57cec5SDimitry Andric int_x86_aesni_aesenclast, memop, 1>; 68560b57cec5SDimitry Andric defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 68570b57cec5SDimitry Andric int_x86_aesni_aesdec, memop, 1>; 68580b57cec5SDimitry Andric defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 68590b57cec5SDimitry Andric int_x86_aesni_aesdeclast, memop, 1>; 68600b57cec5SDimitry Andric} 68610b57cec5SDimitry Andric 68620b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation 68630b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 68640b57cec5SDimitry Andric def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 68650b57cec5SDimitry Andric (ins VR128:$src1), 68660b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 68670b57cec5SDimitry Andric [(set VR128:$dst, 68680b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 686906c3fb27SDimitry Andric VEX, WIG; 68700b57cec5SDimitry Andric def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 68710b57cec5SDimitry Andric (ins i128mem:$src1), 68720b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 68730b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 687406c3fb27SDimitry Andric Sched<[WriteAESIMC.Folded]>, VEX, WIG; 68750b57cec5SDimitry Andric} 68760b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 68770b57cec5SDimitry Andric (ins VR128:$src1), 68780b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 68790b57cec5SDimitry Andric [(set VR128:$dst, 68800b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 68810b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 68820b57cec5SDimitry Andric (ins i128mem:$src1), 68830b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 68840b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 68850b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>; 68860b57cec5SDimitry Andric 68870b57cec5SDimitry Andric// AES Round Key Generation Assist 68880b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 68890b57cec5SDimitry Andric def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 68900b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 68910b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68920b57cec5SDimitry Andric [(set VR128:$dst, 68938bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 689406c3fb27SDimitry Andric Sched<[WriteAESKeyGen]>, VEX, WIG; 68950b57cec5SDimitry Andric def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 68960b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 68970b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 68980b57cec5SDimitry Andric [(set VR128:$dst, 68998bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, 690006c3fb27SDimitry Andric Sched<[WriteAESKeyGen.Folded]>, VEX, WIG; 69010b57cec5SDimitry Andric} 69020b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 69030b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 69040b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 69050b57cec5SDimitry Andric [(set VR128:$dst, 69068bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 69070b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>; 69080b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 69090b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 69100b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 69110b57cec5SDimitry Andric [(set VR128:$dst, 69128bcb0991SDimitry Andric (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, 69130b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>; 69140b57cec5SDimitry Andric 69150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69160b57cec5SDimitry Andric// PCLMUL Instructions 69170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 69180b57cec5SDimitry Andric 69190b57cec5SDimitry Andric// Immediate transform to help with commuting. 69208bcb0991SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<timm, [{ 69210b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 69220b57cec5SDimitry Andric return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 69230b57cec5SDimitry Andric}]>; 69240b57cec5SDimitry Andric 69250b57cec5SDimitry Andric// SSE carry-less Multiplication instructions 69260b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in { 69270b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 69280b57cec5SDimitry Andric let isCommutable = 1 in 6929*0fca6ea1SDimitry Andric def PCLMULQDQrri : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 69300b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 69310b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 69320b57cec5SDimitry Andric [(set VR128:$dst, 69338bcb0991SDimitry Andric (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, 69340b57cec5SDimitry Andric Sched<[WriteCLMul]>; 69350b57cec5SDimitry Andric 6936*0fca6ea1SDimitry Andric def PCLMULQDQrmi : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 69370b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 69380b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 69390b57cec5SDimitry Andric [(set VR128:$dst, 69400b57cec5SDimitry Andric (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 69418bcb0991SDimitry Andric timm:$src3))]>, 69420b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 69430b57cec5SDimitry Andric } // Constraints = "$src1 = $dst" 69440b57cec5SDimitry Andric 69450b57cec5SDimitry Andric def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 69468bcb0991SDimitry Andric (i8 timm:$src3)), 6947*0fca6ea1SDimitry Andric (PCLMULQDQrmi VR128:$src1, addr:$src2, 69488bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 69490b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL] 69500b57cec5SDimitry Andric 69510b57cec5SDimitry Andric// SSE aliases 69520b57cec5SDimitry Andricforeach HI = ["hq","lq"] in 69530b57cec5SDimitry Andricforeach LO = ["hq","lq"] in { 69540b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6955*0fca6ea1SDimitry Andric (PCLMULQDQrri VR128:$dst, VR128:$src, 69560b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 69570b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6958*0fca6ea1SDimitry Andric (PCLMULQDQrmi VR128:$dst, i128mem:$src, 69590b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 69600b57cec5SDimitry Andric} 69610b57cec5SDimitry Andric 69620b57cec5SDimitry Andric// AVX carry-less Multiplication instructions 69630b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 69640b57cec5SDimitry Andric PatFrag LdFrag, Intrinsic IntId> { 69650b57cec5SDimitry Andric let isCommutable = 1 in 6966*0fca6ea1SDimitry Andric def rri : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 69670b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 69680b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 69690b57cec5SDimitry Andric [(set RC:$dst, 69708bcb0991SDimitry Andric (IntId RC:$src1, RC:$src2, timm:$src3))]>, 69710b57cec5SDimitry Andric Sched<[WriteCLMul]>; 69720b57cec5SDimitry Andric 6973*0fca6ea1SDimitry Andric def rmi : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 69740b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2, u8imm:$src3), 69750b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 69760b57cec5SDimitry Andric [(set RC:$dst, 69778bcb0991SDimitry Andric (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, 69780b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 69790b57cec5SDimitry Andric 69800b57cec5SDimitry Andric // We can commute a load in the first operand by swapping the sources and 69810b57cec5SDimitry Andric // rotating the immediate. 69828bcb0991SDimitry Andric def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), 6983*0fca6ea1SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 69848bcb0991SDimitry Andric (PCLMULCommuteImm timm:$src3))>; 69850b57cec5SDimitry Andric} 69860b57cec5SDimitry Andric 69870b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 69880b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 6989cb14a3feSDimitry Andric int_x86_pclmulqdq>, VEX, VVVV, WIG; 69900b57cec5SDimitry Andric 69910b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in 69920b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 6993cb14a3feSDimitry Andric int_x86_pclmulqdq_256>, VEX, VVVV, VEX_L, WIG; 69940b57cec5SDimitry Andric 69950b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 69960b57cec5SDimitry Andric X86MemOperand MemOp, string Hi, string Lo> { 69975ffd83dbSDimitry Andric def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6998*0fca6ea1SDimitry Andric (!cast<Instruction>(InstStr # "rri") RC:$dst, RC:$src1, RC:$src2, 69990b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 70005ffd83dbSDimitry Andric def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7001*0fca6ea1SDimitry Andric (!cast<Instruction>(InstStr # "rmi") RC:$dst, RC:$src1, MemOp:$src2, 70020b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 70030b57cec5SDimitry Andric} 70040b57cec5SDimitry Andric 70050b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 70060b57cec5SDimitry Andric X86MemOperand MemOp> { 70070b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 70080b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 70090b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 70100b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 70110b57cec5SDimitry Andric} 70120b57cec5SDimitry Andric 70130b57cec5SDimitry Andric// AVX aliases 70140b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 70150b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 70160b57cec5SDimitry Andric 70170b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70180b57cec5SDimitry Andric// SSE4A Instructions 70190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70200b57cec5SDimitry Andric 70210b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in { 70220b57cec5SDimitry Andric 70230b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 70240b57cec5SDimitry Andriclet Constraints = "$src = $dst" in { 70250b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 70260b57cec5SDimitry Andric (ins VR128:$src, u8imm:$len, u8imm:$idx), 70270b57cec5SDimitry Andric "extrq\t{$idx, $len, $src|$src, $len, $idx}", 70288bcb0991SDimitry Andric [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, 70298bcb0991SDimitry Andric timm:$idx))]>, 7030cb14a3feSDimitry Andric TB, PD, Sched<[SchedWriteVecALU.XMM]>; 70310b57cec5SDimitry Andricdef EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 70320b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 70330b57cec5SDimitry Andric "extrq\t{$mask, $src|$src, $mask}", 70340b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 70350b57cec5SDimitry Andric VR128:$mask))]>, 7036cb14a3feSDimitry Andric TB, PD, Sched<[SchedWriteVecALU.XMM]>; 70370b57cec5SDimitry Andric 70380b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 70390b57cec5SDimitry Andric (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 70400b57cec5SDimitry Andric "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 70410b57cec5SDimitry Andric [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 70428bcb0991SDimitry Andric timm:$len, timm:$idx))]>, 7043cb14a3feSDimitry Andric TB, XD, Sched<[SchedWriteVecALU.XMM]>; 70440b57cec5SDimitry Andricdef INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 70450b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 70460b57cec5SDimitry Andric "insertq\t{$mask, $src|$src, $mask}", 70470b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 70480b57cec5SDimitry Andric VR128:$mask))]>, 7049cb14a3feSDimitry Andric TB, XD, Sched<[SchedWriteVecALU.XMM]>; 70500b57cec5SDimitry Andric} 70510b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 70520b57cec5SDimitry Andric 70530b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores. 70540b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 70550b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 70560b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 7057cb14a3feSDimitry Andric "movntss\t{$src, $dst|$dst, $src}", []>, TB, XS; 70580b57cec5SDimitry Andric 70590b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7060cb14a3feSDimitry Andric "movntsd\t{$src, $dst|$dst, $src}", []>, TB, XD; 70610b57cec5SDimitry Andric} // SchedRW 70620b57cec5SDimitry Andric 70630b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst), 70640b57cec5SDimitry Andric (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 70650b57cec5SDimitry Andric 70660b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst), 70670b57cec5SDimitry Andric (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 70680b57cec5SDimitry Andric 70690b57cec5SDimitry Andric} // AddedComplexity 70700b57cec5SDimitry Andric} // HasSSE4A 70710b57cec5SDimitry Andric 70720b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70730b57cec5SDimitry Andric// AVX Instructions 70740b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70750b57cec5SDimitry Andric 70760b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 70770b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the 70780b57cec5SDimitry Andric// destination operand 70790b57cec5SDimitry Andric// 70800b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 70810b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 70828bcb0991SDimitry Andric PatFrag bcast_frag, SchedWrite Sched> : 70830b57cec5SDimitry Andric AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 70840b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 70858bcb0991SDimitry Andric [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, 7086*0fca6ea1SDimitry Andric Sched<[Sched]>, VEX { 7087*0fca6ea1SDimitry Andric let isReMaterializable = 1; 7088*0fca6ea1SDimitry Andric} 70890b57cec5SDimitry Andric 70900b57cec5SDimitry Andric// AVX2 adds register forms 70910b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 70920b57cec5SDimitry Andric ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 70930b57cec5SDimitry Andric AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 70940b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 70950b57cec5SDimitry Andric [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 70960b57cec5SDimitry Andric Sched<[Sched]>, VEX; 70970b57cec5SDimitry Andric 70980b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 70990b57cec5SDimitry Andric def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 71008bcb0991SDimitry Andric f32mem, v4f32, X86VBroadcastld32, 71010b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>; 71020b57cec5SDimitry Andric def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 71038bcb0991SDimitry Andric f32mem, v8f32, X86VBroadcastld32, 71040b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 71050b57cec5SDimitry Andric} 71060b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 71070b57cec5SDimitry Andricdef VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 71088bcb0991SDimitry Andric v4f64, X86VBroadcastld64, 71090b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 71100b57cec5SDimitry Andric 71110b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 71120b57cec5SDimitry Andric def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 71130b57cec5SDimitry Andric v4f32, v4f32, SchedWriteFShuffle.XMM>; 71140b57cec5SDimitry Andric def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 71150b57cec5SDimitry Andric v8f32, v4f32, WriteFShuffle256>, VEX_L; 71160b57cec5SDimitry Andric} 71170b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 71180b57cec5SDimitry Andricdef VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 71190b57cec5SDimitry Andric v4f64, v2f64, WriteFShuffle256>, VEX_L; 71200b57cec5SDimitry Andric 71210b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 71220b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 71230b57cec5SDimitry Andric// halves of a 256-bit vector. 71240b57cec5SDimitry Andric// 71250b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 71265f757f3fSDimitry Andricdef VBROADCASTI128rm : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 71270b57cec5SDimitry Andric (ins i128mem:$src), 71280b57cec5SDimitry Andric "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 71290b57cec5SDimitry Andric Sched<[WriteShuffleLd]>, VEX, VEX_L; 71300b57cec5SDimitry Andric 71310b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 71320b57cec5SDimitry Andric ExeDomain = SSEPackedSingle in 71335f757f3fSDimitry Andricdef VBROADCASTF128rm : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 71340b57cec5SDimitry Andric (ins f128mem:$src), 71350b57cec5SDimitry Andric "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 71360b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 71370b57cec5SDimitry Andric 71380b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7139e8d8bef9SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 71405f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7141e8d8bef9SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 71425f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 71430b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 71440b57cec5SDimitry Andric// convert to integer when profitable. 7145e8d8bef9SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 71465f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7147e8d8bef9SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 71485f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7149e8d8bef9SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 71505f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7151fcaf7f86SDimitry Andricdef : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 71525f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7153e8d8bef9SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 71545f757f3fSDimitry Andric (VBROADCASTF128rm addr:$src)>; 71550b57cec5SDimitry Andric} 71560b57cec5SDimitry Andric 7157647cbc5dSDimitry Andriclet Predicates = [HasAVXNECONVERT, NoVLX] in 7158647cbc5dSDimitry Andric def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), 7159647cbc5dSDimitry Andric (VBROADCASTF128rm addr:$src)>; 7160647cbc5dSDimitry Andric 71610b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7162349cc55cSDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7163349cc55cSDimitry Andric// 7164349cc55cSDimitry Andric 7165349cc55cSDimitry Andriclet ExeDomain = SSEPackedSingle in { 7166349cc55cSDimitry Andriclet isCommutable = 1 in 7167349cc55cSDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7168349cc55cSDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7169349cc55cSDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7170cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[WriteFShuffle256]>; 7171349cc55cSDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7172349cc55cSDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7173349cc55cSDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7174cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 7175349cc55cSDimitry Andric} 7176349cc55cSDimitry Andric 7177349cc55cSDimitry Andric// Immediate transform to help with commuting. 7178349cc55cSDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<timm, [{ 7179349cc55cSDimitry Andric return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 7180349cc55cSDimitry Andric}]>; 7181349cc55cSDimitry Andric 7182349cc55cSDimitry Andricmulticlass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { 7183349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), 7184349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; 7185349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), 7186349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; 7187349cc55cSDimitry Andric // Pattern with load in other operand. 7188349cc55cSDimitry Andric def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), 7189349cc55cSDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7190349cc55cSDimitry Andric (Perm2XCommuteImm timm:$imm))>; 7191349cc55cSDimitry Andric} 7192349cc55cSDimitry Andric 7193349cc55cSDimitry Andriclet Predicates = [HasAVX] in { 7194349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; 7195349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; 7196349cc55cSDimitry Andric} 7197349cc55cSDimitry Andric 7198349cc55cSDimitry Andriclet Predicates = [HasAVX1Only] in { 7199349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; 7200349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; 7201349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; 7202fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>; 7203349cc55cSDimitry Andric defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; 7204349cc55cSDimitry Andric} 7205349cc55cSDimitry Andric 7206349cc55cSDimitry Andric//===----------------------------------------------------------------------===// 72070b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values 72080b57cec5SDimitry Andric// 72090b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 72100b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 72110b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 72120b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7213cb14a3feSDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX, VVVV, VEX_L; 72140b57cec5SDimitry Andriclet mayLoad = 1 in 72150b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 72160b57cec5SDimitry Andric (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 72170b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7218cb14a3feSDimitry Andric []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; 72190b57cec5SDimitry Andric} 72200b57cec5SDimitry Andric 72210b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS 72220b57cec5SDimitry Andric// with YMM register containing zero. 72230b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs. 72240b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 72250b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 72260b57cec5SDimitry Andric} 72270b57cec5SDimitry Andric 7228349cc55cSDimitry Andricmulticlass vinsert_lowering<string InstrStr, string PermStr, 7229349cc55cSDimitry Andric ValueType From, ValueType To, 7230349cc55cSDimitry Andric PatFrag frommemop_frag, PatFrag tomemop_frag> { 72310b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 72320b57cec5SDimitry Andric (iPTR imm)), 72330b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 72340b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 72350b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 7236349cc55cSDimitry Andric (From (frommemop_frag addr:$src2)), 72370b57cec5SDimitry Andric (iPTR imm)), 72380b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 72390b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 7240349cc55cSDimitry Andric // Folding "To" vector - convert to perm2x128 and commute inputs. 7241349cc55cSDimitry Andric def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)), 7242349cc55cSDimitry Andric (From VR128:$src2), 7243349cc55cSDimitry Andric (iPTR imm)), 7244349cc55cSDimitry Andric (!cast<Instruction>(PermStr#rm) 7245349cc55cSDimitry Andric (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 7246349cc55cSDimitry Andric addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>; 72470b57cec5SDimitry Andric} 72480b57cec5SDimitry Andric 72490b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7250349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>; 7251349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>; 72520b57cec5SDimitry Andric} 72530b57cec5SDimitry Andric 72540b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7255349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; 7256349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; 7257349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; 7258fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>; 7259fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; 7260349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; 72610b57cec5SDimitry Andric} 72620b57cec5SDimitry Andric 72630b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 72640b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values 72650b57cec5SDimitry Andric// 72660b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 72670b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 72680b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 72690b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 72700b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 72710b57cec5SDimitry Andriclet mayStore = 1 in 72720b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 72730b57cec5SDimitry Andric (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 72740b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 72750b57cec5SDimitry Andric []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 72760b57cec5SDimitry Andric} 72770b57cec5SDimitry Andric 72780b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 72790b57cec5SDimitry Andric def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 72800b57cec5SDimitry Andric (To (!cast<Instruction>(InstrStr#rr) 72810b57cec5SDimitry Andric (From VR256:$src1), 72820b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext)))>; 72830b57cec5SDimitry Andric def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 72840b57cec5SDimitry Andric (iPTR imm))), addr:$dst), 72850b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 72860b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext))>; 72870b57cec5SDimitry Andric} 72880b57cec5SDimitry Andric 72890b57cec5SDimitry Andric// AVX1 patterns 72900b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 72910b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 72920b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 72930b57cec5SDimitry Andric} 72940b57cec5SDimitry Andric 72950b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 72960b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 72970b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 72980b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 7299fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>; 7300fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 73010b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 73020b57cec5SDimitry Andric} 73030b57cec5SDimitry Andric 73040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 73050b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores 73060b57cec5SDimitry Andric// 73070b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 73080b57cec5SDimitry Andric Intrinsic IntLd, Intrinsic IntLd256, 73098bcb0991SDimitry Andric Intrinsic IntSt, Intrinsic IntSt256, 73108bcb0991SDimitry Andric X86SchedWriteMaskMove schedX, 73118bcb0991SDimitry Andric X86SchedWriteMaskMove schedY> { 73120b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 73130b57cec5SDimitry Andric (ins VR128:$src1, f128mem:$src2), 73140b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73150b57cec5SDimitry Andric [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7316cb14a3feSDimitry Andric VEX, VVVV, Sched<[schedX.RM]>; 73170b57cec5SDimitry Andric def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 73180b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2), 73190b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73200b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7321cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[schedY.RM]>; 73220b57cec5SDimitry Andric def mr : AVX8I<opc_mr, MRMDestMem, (outs), 73230b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src1, VR128:$src2), 73240b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73250b57cec5SDimitry Andric [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 7326cb14a3feSDimitry Andric VEX, VVVV, Sched<[schedX.MR]>; 73270b57cec5SDimitry Andric def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 73280b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src1, VR256:$src2), 73290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 73300b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7331cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[schedY.MR]>; 73320b57cec5SDimitry Andric} 73330b57cec5SDimitry Andric 73340b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 73350b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 73360b57cec5SDimitry Andric int_x86_avx_maskload_ps, 73370b57cec5SDimitry Andric int_x86_avx_maskload_ps_256, 73380b57cec5SDimitry Andric int_x86_avx_maskstore_ps, 73398bcb0991SDimitry Andric int_x86_avx_maskstore_ps_256, 73408bcb0991SDimitry Andric WriteFMaskMove32, WriteFMaskMove32Y>; 73410b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 73420b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 73430b57cec5SDimitry Andric int_x86_avx_maskload_pd, 73440b57cec5SDimitry Andric int_x86_avx_maskload_pd_256, 73450b57cec5SDimitry Andric int_x86_avx_maskstore_pd, 73468bcb0991SDimitry Andric int_x86_avx_maskstore_pd_256, 73478bcb0991SDimitry Andric WriteFMaskMove64, WriteFMaskMove64Y>; 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7350e8d8bef9SDimitry Andric// AVX_VNNI 7351e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 7352fe6060f1SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst", 73535f757f3fSDimitry Andric explicitOpPrefix = ExplicitVEX in 7354e8d8bef9SDimitry Andricmulticlass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7355e8d8bef9SDimitry Andric bit IsCommutable> { 7356e8d8bef9SDimitry Andric let isCommutable = IsCommutable in 7357e8d8bef9SDimitry Andric def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 7358e8d8bef9SDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 7359e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7360e8d8bef9SDimitry Andric [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, 7361e8d8bef9SDimitry Andric VR128:$src2, VR128:$src3)))]>, 7362cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; 7363e8d8bef9SDimitry Andric 7364e8d8bef9SDimitry Andric def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 7365e8d8bef9SDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 7366e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7367e8d8bef9SDimitry Andric [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, 7368e8d8bef9SDimitry Andric (loadv4i32 addr:$src3))))]>, 7369cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM.Folded, 737006c3fb27SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold, 737106c3fb27SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold]>; 7372e8d8bef9SDimitry Andric 7373e8d8bef9SDimitry Andric let isCommutable = IsCommutable in 7374e8d8bef9SDimitry Andric def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 7375e8d8bef9SDimitry Andric (ins VR256:$src1, VR256:$src2, VR256:$src3), 7376e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7377e8d8bef9SDimitry Andric [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, 7378e8d8bef9SDimitry Andric VR256:$src2, VR256:$src3)))]>, 7379cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 7380e8d8bef9SDimitry Andric 7381e8d8bef9SDimitry Andric def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 7382e8d8bef9SDimitry Andric (ins VR256:$src1, VR256:$src2, i256mem:$src3), 7383e8d8bef9SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7384e8d8bef9SDimitry Andric [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, 7385e8d8bef9SDimitry Andric (loadv8i32 addr:$src3))))]>, 7386cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded, 738706c3fb27SDimitry Andric SchedWriteVecIMul.YMM.ReadAfterFold, 738806c3fb27SDimitry Andric SchedWriteVecIMul.YMM.ReadAfterFold]>; 7389e8d8bef9SDimitry Andric} 7390e8d8bef9SDimitry Andric 7391fe6060f1SDimitry Andricdefm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>; 7392fe6060f1SDimitry Andricdefm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>; 7393fe6060f1SDimitry Andricdefm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>; 7394fe6060f1SDimitry Andricdefm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>; 7395e8d8bef9SDimitry Andric 7396e8d8bef9SDimitry Andriclet Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { 7397e8d8bef9SDimitry Andric def : Pat<(v8i32 (add VR256:$src1, 7398e8d8bef9SDimitry Andric (X86vpmaddwd_su VR256:$src2, VR256:$src3))), 7399e8d8bef9SDimitry Andric (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; 7400e8d8bef9SDimitry Andric def : Pat<(v8i32 (add VR256:$src1, 7401e8d8bef9SDimitry Andric (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), 7402e8d8bef9SDimitry Andric (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; 7403e8d8bef9SDimitry Andric def : Pat<(v4i32 (add VR128:$src1, 7404e8d8bef9SDimitry Andric (X86vpmaddwd_su VR128:$src2, VR128:$src3))), 7405e8d8bef9SDimitry Andric (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 7406e8d8bef9SDimitry Andric def : Pat<(v4i32 (add VR128:$src1, 7407e8d8bef9SDimitry Andric (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), 7408e8d8bef9SDimitry Andric (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; 7409e8d8bef9SDimitry Andric} 7410e8d8bef9SDimitry Andric 7411e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 74120b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values 74130b57cec5SDimitry Andric// 74140b57cec5SDimitry Andric 74150b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 74160b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop_f, 74170b57cec5SDimitry Andric X86MemOperand x86memop_i, 74180b57cec5SDimitry Andric ValueType f_vt, ValueType i_vt, 74190b57cec5SDimitry Andric X86FoldableSchedWrite sched, 74200b57cec5SDimitry Andric X86FoldableSchedWrite varsched> { 74210b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 74220b57cec5SDimitry Andric def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 74230b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 74240b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7425cb14a3feSDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX, VVVV, 74260b57cec5SDimitry Andric Sched<[varsched]>; 74270b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 74280b57cec5SDimitry Andric (ins RC:$src1, x86memop_i:$src2), 74290b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 74300b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 7431cb14a3feSDimitry Andric (i_vt (load addr:$src2)))))]>, VEX, VVVV, 74320b57cec5SDimitry Andric Sched<[varsched.Folded, sched.ReadAfterFold]>; 74330b57cec5SDimitry Andric 74340b57cec5SDimitry Andric def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 74350b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 74360b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 74378bcb0991SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, 74380b57cec5SDimitry Andric Sched<[sched]>; 74390b57cec5SDimitry Andric def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 74400b57cec5SDimitry Andric (ins x86memop_f:$src1, u8imm:$src2), 74410b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 74420b57cec5SDimitry Andric [(set RC:$dst, 74438bcb0991SDimitry Andric (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, 74440b57cec5SDimitry Andric Sched<[sched.Folded]>; 74450b57cec5SDimitry Andric }// Predicates = [HasAVX, NoVLX] 74460b57cec5SDimitry Andric} 74470b57cec5SDimitry Andric 74480b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 74490b57cec5SDimitry Andric defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 74500b57cec5SDimitry Andric v4f32, v4i32, SchedWriteFShuffle.XMM, 74510b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 74520b57cec5SDimitry Andric defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 74530b57cec5SDimitry Andric v8f32, v8i32, SchedWriteFShuffle.YMM, 74540b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 74550b57cec5SDimitry Andric} 74560b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 74570b57cec5SDimitry Andric defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 74580b57cec5SDimitry Andric v2f64, v2i64, SchedWriteFShuffle.XMM, 74590b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 74600b57cec5SDimitry Andric defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 74610b57cec5SDimitry Andric v4f64, v4i64, SchedWriteFShuffle.YMM, 74620b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 74630b57cec5SDimitry Andric} 74640b57cec5SDimitry Andric 74650b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 74660b57cec5SDimitry Andric// VZERO - Zero YMM registers 74670b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31. 74680b57cec5SDimitry Andric// 74690b57cec5SDimitry Andric 74700b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 74710b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 74720b57cec5SDimitry Andric YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 74730b57cec5SDimitry Andric // Zero All YMM registers 74740b57cec5SDimitry Andric def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7475cb14a3feSDimitry Andric [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, 747606c3fb27SDimitry Andric Requires<[HasAVX]>, WIG; 74770b57cec5SDimitry Andric 74780b57cec5SDimitry Andric // Zero Upper bits of YMM registers 74790b57cec5SDimitry Andric def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7480cb14a3feSDimitry Andric [(int_x86_avx_vzeroupper)]>, TB, VEX, 748106c3fb27SDimitry Andric Requires<[HasAVX]>, WIG; 74820b57cec5SDimitry Andric} // Defs 74830b57cec5SDimitry Andric} // SchedRW 74840b57cec5SDimitry Andric 74850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 74860b57cec5SDimitry Andric// Half precision conversion instructions 74870b57cec5SDimitry Andric// 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 74900b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 74910b57cec5SDimitry Andric def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 74920b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 74935ffd83dbSDimitry Andric [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>, 7494cb14a3feSDimitry Andric T8, PD, VEX, Sched<[sched]>; 74950b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 74960b57cec5SDimitry Andric def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 74970b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 7498cb14a3feSDimitry Andric []>, T8, PD, VEX, Sched<[sched.Folded]>; 74990b57cec5SDimitry Andric} 75000b57cec5SDimitry Andric 75010b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 75020b57cec5SDimitry Andric SchedWrite RR, SchedWrite MR> { 75030b57cec5SDimitry Andric def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 75040b57cec5SDimitry Andric (ins RC:$src1, i32u8imm:$src2), 75050b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 75065ffd83dbSDimitry Andric [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>, 7507cb14a3feSDimitry Andric TA, PD, VEX, Sched<[RR]>; 75080b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 75090b57cec5SDimitry Andric def mr : Ii8<0x1D, MRMDestMem, (outs), 75100b57cec5SDimitry Andric (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 75110b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7512cb14a3feSDimitry Andric TA, PD, VEX, Sched<[MR]>; 75130b57cec5SDimitry Andric} 75140b57cec5SDimitry Andric 75150b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 7516480093f4SDimitry Andric defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC; 7517480093f4SDimitry Andric defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC; 75180b57cec5SDimitry Andric defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 7519480093f4SDimitry Andric WriteCvtPS2PHSt>, SIMD_EXC; 75200b57cec5SDimitry Andric defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 7521480093f4SDimitry Andric WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC; 75220b57cec5SDimitry Andric 75230b57cec5SDimitry Andric // Pattern match vcvtph2ps of a scalar i64 load. 75245ffd83dbSDimitry Andric def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 75250b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 75265ffd83dbSDimitry Andric def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 75270b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 75280b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 75295ffd83dbSDimitry Andric def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))), 75305ffd83dbSDimitry Andric (VCVTPH2PSYrm addr:$src)>; 75310b57cec5SDimitry Andric 75320b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 75335ffd83dbSDimitry Andric (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 75340b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 75358bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 75360b57cec5SDimitry Andric def : Pat<(store (i64 (extractelt 75375ffd83dbSDimitry Andric (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 75380b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 75398bcb0991SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 75405ffd83dbSDimitry Andric def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), 75418bcb0991SDimitry Andric (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; 75420b57cec5SDimitry Andric} 75430b57cec5SDimitry Andric 75440b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75450b57cec5SDimitry Andric// AVX2 Instructions 75460b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 75470b57cec5SDimitry Andric 75480b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 75490b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 75500b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite sched, 75510b57cec5SDimitry Andric RegisterClass RC, 75520b57cec5SDimitry Andric X86MemOperand x86memop, SDNodeXForm commuteXForm> { 75530b57cec5SDimitry Andric let isCommutable = 1 in 75540b57cec5SDimitry Andric def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 75550b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 75560b57cec5SDimitry Andric !strconcat(OpcodeStr, 75570b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 75588bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 7559cb14a3feSDimitry Andric Sched<[sched]>, VEX, VVVV; 75600b57cec5SDimitry Andric def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 75610b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 75620b57cec5SDimitry Andric !strconcat(OpcodeStr, 75630b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 75640b57cec5SDimitry Andric [(set RC:$dst, 75658bcb0991SDimitry Andric (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, 7566cb14a3feSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX, VVVV; 75670b57cec5SDimitry Andric 75680b57cec5SDimitry Andric // Pattern to commute if load is in first source. 75698bcb0991SDimitry Andric def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), 75700b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 75718bcb0991SDimitry Andric (commuteXForm timm:$src3))>; 75720b57cec5SDimitry Andric} 75730b57cec5SDimitry Andric 75740b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 75750b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 75760b57cec5SDimitry Andric SchedWriteBlend.XMM, VR128, i128mem, 75770b57cec5SDimitry Andric BlendCommuteImm4>; 75780b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 75790b57cec5SDimitry Andric SchedWriteBlend.YMM, VR256, i256mem, 75800b57cec5SDimitry Andric BlendCommuteImm8>, VEX_L; 75810b57cec5SDimitry Andric 75828bcb0991SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 75838bcb0991SDimitry Andric (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; 75848bcb0991SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 75858bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 75868bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 75878bcb0991SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 75880b57cec5SDimitry Andric 75898bcb0991SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 75908bcb0991SDimitry Andric (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; 75918bcb0991SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 75928bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; 75938bcb0991SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 75948bcb0991SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; 75950b57cec5SDimitry Andric} 75960b57cec5SDimitry Andric 75970b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 75980b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 75995ffd83dbSDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing should 76000b57cec5SDimitry Andric// take care of using integer instructions when profitable. 76010b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 76020b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 76030b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 76040b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76050b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 76060b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 76070b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 76080b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76090b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 76100b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 76110b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 76120b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76130b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7614fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)), 7615fcaf7f86SDimitry Andric (VBLENDPSYrri VR256:$src1, 7616fcaf7f86SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7617fcaf7f86SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 76180b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 76190b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 76200b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76210b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 76220b57cec5SDimitry Andric 76230b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 76240b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76250b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 76260b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 76270b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76280b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 76290b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 76300b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76310b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7632fcaf7f86SDimitry Andricdef : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)), 7633fcaf7f86SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7634fcaf7f86SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 76350b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 76360b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 76370b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 76380b57cec5SDimitry Andric} 76390b57cec5SDimitry Andric 76400b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 76410b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the 76420b57cec5SDimitry Andric// destination operand 76430b57cec5SDimitry Andric// 76440b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr, 76458bcb0991SDimitry Andric X86MemOperand x86memop, PatFrag bcast_frag, 76460b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, Predicate prd> { 76470b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in { 76480b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 76490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76500b57cec5SDimitry Andric [(set VR128:$dst, 76510b57cec5SDimitry Andric (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 76520b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM]>, VEX; 76530b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 76540b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76550b57cec5SDimitry Andric [(set VR128:$dst, 76568bcb0991SDimitry Andric (OpVT128 (bcast_frag addr:$src)))]>, 76570b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 76580b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 76590b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76600b57cec5SDimitry Andric [(set VR256:$dst, 76610b57cec5SDimitry Andric (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 76620b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 76630b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 76640b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 76650b57cec5SDimitry Andric [(set VR256:$dst, 76668bcb0991SDimitry Andric (OpVT256 (bcast_frag addr:$src)))]>, 76670b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 76680b57cec5SDimitry Andric 76690b57cec5SDimitry Andric // Provide aliases for broadcast from the same register class that 76700b57cec5SDimitry Andric // automatically does the extract. 76710b57cec5SDimitry Andric def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 76720b57cec5SDimitry Andric (!cast<Instruction>(NAME#"Yrr") 76730b57cec5SDimitry Andric (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 76740b57cec5SDimitry Andric } 76750b57cec5SDimitry Andric} 76760b57cec5SDimitry Andric 76778bcb0991SDimitry Andricdefm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, 76780b57cec5SDimitry Andric v16i8, v32i8, NoVLX_Or_NoBWI>; 76798bcb0991SDimitry Andricdefm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, 76800b57cec5SDimitry Andric v8i16, v16i16, NoVLX_Or_NoBWI>; 76818bcb0991SDimitry Andricdefm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, 76820b57cec5SDimitry Andric v4i32, v8i32, NoVLX>; 76838bcb0991SDimitry Andricdefm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, 76840b57cec5SDimitry Andric v2i64, v4i64, NoVLX>; 76850b57cec5SDimitry Andric 76860b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 76870b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 76880b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 76890b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 76900b57cec5SDimitry Andric (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 76910b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 76920b57cec5SDimitry Andric (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 76930b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 76940b57cec5SDimitry Andric (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 76950b57cec5SDimitry Andric} 76960b57cec5SDimitry Andric 76970b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 76980b57cec5SDimitry Andric def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 76998bcb0991SDimitry Andric (VPBROADCASTBrr (VMOVDI2PDIrr 77000b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 77018bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 77020b57cec5SDimitry Andric def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 77038bcb0991SDimitry Andric (VPBROADCASTBYrr (VMOVDI2PDIrr 77040b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 77058bcb0991SDimitry Andric GR8:$src, sub_8bit))))>; 77060b57cec5SDimitry Andric 77070b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 77088bcb0991SDimitry Andric (VPBROADCASTWrr (VMOVDI2PDIrr 77090b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 77108bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 77110b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 77128bcb0991SDimitry Andric (VPBROADCASTWYrr (VMOVDI2PDIrr 77130b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 77148bcb0991SDimitry Andric GR16:$src, sub_16bit))))>; 771581ad6265SDimitry Andric 771681ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 771781ad6265SDimitry Andric (VPBROADCASTWrm addr:$src)>; 771881ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 771981ad6265SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 772081ad6265SDimitry Andric 772181ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128:$src))), 772281ad6265SDimitry Andric (VPBROADCASTWrr VR128:$src)>; 772381ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128:$src))), 772481ad6265SDimitry Andric (VPBROADCASTWYrr VR128:$src)>; 772581ad6265SDimitry Andric 772681ad6265SDimitry Andric def : Pat<(v8f16 (X86VBroadcast (f16 FR16:$src))), 772781ad6265SDimitry Andric (VPBROADCASTWrr (COPY_TO_REGCLASS FR16:$src, VR128))>; 772881ad6265SDimitry Andric def : Pat<(v16f16 (X86VBroadcast (f16 FR16:$src))), 772981ad6265SDimitry Andric (VPBROADCASTWYrr (COPY_TO_REGCLASS FR16:$src, VR128))>; 77300b57cec5SDimitry Andric} 77310b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 77320b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 77338bcb0991SDimitry Andric (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; 77340b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 77358bcb0991SDimitry Andric (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; 77360b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 77378bcb0991SDimitry Andric (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; 77380b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 77398bcb0991SDimitry Andric (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; 77400b57cec5SDimitry Andric} 77410b57cec5SDimitry Andric 77420b57cec5SDimitry Andric// AVX1 broadcast patterns 77430b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 77448bcb0991SDimitry Andricdef : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), 77450b57cec5SDimitry Andric (VBROADCASTSSYrm addr:$src)>; 77468bcb0991SDimitry Andricdef : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), 77470b57cec5SDimitry Andric (VBROADCASTSDYrm addr:$src)>; 77488bcb0991SDimitry Andricdef : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), 77490b57cec5SDimitry Andric (VBROADCASTSSrm addr:$src)>; 77500b57cec5SDimitry Andric} 77510b57cec5SDimitry Andric 77520b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 77530b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 77540b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 77550b57cec5SDimitry Andric // 128bit broadcasts: 77560b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast f64:$src)), 77570b57cec5SDimitry Andric (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 77588bcb0991SDimitry Andric def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), 77590b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 77600b57cec5SDimitry Andric 77610b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 77620b57cec5SDimitry Andric (VMOVDDUPrr VR128:$src)>; 77630b57cec5SDimitry Andric} 77640b57cec5SDimitry Andric 77650b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 77660b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 77670b57cec5SDimitry Andric (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 77680b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 77690b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 77700b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 77710b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 7772349cc55cSDimitry Andric def : Pat<(v8f32 (X86VBroadcast v4f32:$src)), 7773349cc55cSDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7774349cc55cSDimitry Andric (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm), 7775349cc55cSDimitry Andric (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>; 77760b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 77770b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 77780b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 77790b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 7780349cc55cSDimitry Andric def : Pat<(v4f64 (X86VBroadcast v2f64:$src)), 7781349cc55cSDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7782349cc55cSDimitry Andric (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm), 7783349cc55cSDimitry Andric (v2f64 (VMOVDDUPrr VR128:$src)), 1)>; 77840b57cec5SDimitry Andric 77850b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 77868bcb0991SDimitry Andric (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; 77870b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 77880b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 77898bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), 77908bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; 77910b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 77920b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 77938bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), 77948bcb0991SDimitry Andric (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; 77950b57cec5SDimitry Andric 77960b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast i64:$src)), 77978bcb0991SDimitry Andric (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; 77988bcb0991SDimitry Andric def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), 77990b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 78000b57cec5SDimitry Andric} 78010b57cec5SDimitry Andric 78020b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78030b57cec5SDimitry Andric// VPERM - Permute instructions 78040b57cec5SDimitry Andric// 78050b57cec5SDimitry Andric 78060b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr, 78070b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 78080b57cec5SDimitry Andric X86MemOperand memOp> { 78090b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 78100b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 78110b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 78120b57cec5SDimitry Andric !strconcat(OpcodeStr, 78130b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78140b57cec5SDimitry Andric [(set VR256:$dst, 78150b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 7816cb14a3feSDimitry Andric Sched<[Sched]>, VEX, VVVV, VEX_L; 78170b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 78180b57cec5SDimitry Andric (ins VR256:$src1, memOp:$src2), 78190b57cec5SDimitry Andric !strconcat(OpcodeStr, 78200b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78210b57cec5SDimitry Andric [(set VR256:$dst, 78220b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, 78230b57cec5SDimitry Andric (load addr:$src2))))]>, 7824cb14a3feSDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VVVV, VEX_L; 78250b57cec5SDimitry Andric } 78260b57cec5SDimitry Andric} 78270b57cec5SDimitry Andric 78280b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 78290b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 78300b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 78310b57cec5SDimitry Andric 78320b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 78330b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 78340b57cec5SDimitry Andric X86MemOperand memOp> { 78350b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 78360b57cec5SDimitry Andric def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 78370b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 78380b57cec5SDimitry Andric !strconcat(OpcodeStr, 78390b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78400b57cec5SDimitry Andric [(set VR256:$dst, 78418bcb0991SDimitry Andric (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, 78420b57cec5SDimitry Andric Sched<[Sched]>, VEX, VEX_L; 78430b57cec5SDimitry Andric def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 78440b57cec5SDimitry Andric (ins memOp:$src1, u8imm:$src2), 78450b57cec5SDimitry Andric !strconcat(OpcodeStr, 78460b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 78470b57cec5SDimitry Andric [(set VR256:$dst, 78480b57cec5SDimitry Andric (OpVT (X86VPermi (mem_frag addr:$src1), 78498bcb0991SDimitry Andric (i8 timm:$src2))))]>, 78500b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 78510b57cec5SDimitry Andric } 78520b57cec5SDimitry Andric} 78530b57cec5SDimitry Andric 78540b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 785506c3fb27SDimitry Andric WriteShuffle256, i256mem>, REX_W; 78560b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 78570b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 785806c3fb27SDimitry Andric WriteFShuffle256, f256mem>, REX_W; 78590b57cec5SDimitry Andric 78600b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7861e8d8bef9SDimitry Andric// VPERM2I128 - Permute Integer vector Values in 128-bit chunks 78620b57cec5SDimitry Andric// 78630b57cec5SDimitry Andriclet isCommutable = 1 in 78640b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 78650b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7866e8d8bef9SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7867cb14a3feSDimitry Andric Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L; 78680b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 78690b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7870e8d8bef9SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7871cb14a3feSDimitry Andric Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; 78720b57cec5SDimitry Andric 7873e8d8bef9SDimitry Andriclet Predicates = [HasAVX2] in { 7874e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; 7875e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; 7876e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; 7877fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>; 7878fcaf7f86SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7879e8d8bef9SDimitry Andric defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7880e8d8bef9SDimitry Andric} 78810b57cec5SDimitry Andric 78820b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 78830b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values 78840b57cec5SDimitry Andric// 78850b57cec5SDimitry Andriclet hasSideEffects = 0 in { 78860b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 78870b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 78880b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7889cb14a3feSDimitry Andric []>, Sched<[WriteShuffle256]>, VEX, VVVV, VEX_L; 78900b57cec5SDimitry Andriclet mayLoad = 1 in 78910b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 78920b57cec5SDimitry Andric (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 78930b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7894cb14a3feSDimitry Andric []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX, VVVV, VEX_L; 78950b57cec5SDimitry Andric} 78960b57cec5SDimitry Andric 78970b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7898349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; 7899349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; 7900349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; 7901fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16, loadv16f16>; 7902fcaf7f86SDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; 7903349cc55cSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; 79040b57cec5SDimitry Andric} 79050b57cec5SDimitry Andric 7906647cbc5dSDimitry Andriclet Predicates = [HasAVXNECONVERT, NoVLX] in 7907647cbc5dSDimitry Andric defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8bf16, v16bf16, loadv8bf16, loadv16bf16>; 7908647cbc5dSDimitry Andric 79090b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 79100b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values 79110b57cec5SDimitry Andric// 79120b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 79130b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 79140b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 79150b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 79160b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in 79170b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 79180b57cec5SDimitry Andric (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 79190b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 79200b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 79210b57cec5SDimitry Andric 79220b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 79230b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 79240b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 79250b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 7926fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>; 7927fcaf7f86SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 79280b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 79290b57cec5SDimitry Andric} 79300b57cec5SDimitry Andric 7931647cbc5dSDimitry Andriclet Predicates = [HasAVXNECONVERT, NoVLX] in 7932647cbc5dSDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16bf16, v8bf16>; 7933647cbc5dSDimitry Andric 79340b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 79350b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 79360b57cec5SDimitry Andric// 79370b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr, 79380b57cec5SDimitry Andric Intrinsic IntLd128, Intrinsic IntLd256, 79395ffd83dbSDimitry Andric Intrinsic IntSt128, Intrinsic IntSt256, 79405ffd83dbSDimitry Andric X86SchedWriteMaskMove schedX, 79415ffd83dbSDimitry Andric X86SchedWriteMaskMove schedY> { 79420b57cec5SDimitry Andric def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 79430b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 79440b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79450b57cec5SDimitry Andric [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 7946cb14a3feSDimitry Andric VEX, VVVV, Sched<[schedX.RM]>; 79470b57cec5SDimitry Andric def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 79480b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 79490b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79500b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7951cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[schedY.RM]>; 79520b57cec5SDimitry Andric def mr : AVX28I<0x8e, MRMDestMem, (outs), 79530b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src1, VR128:$src2), 79540b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79550b57cec5SDimitry Andric [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 7956cb14a3feSDimitry Andric VEX, VVVV, Sched<[schedX.MR]>; 79570b57cec5SDimitry Andric def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 79580b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src1, VR256:$src2), 79590b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 79600b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7961cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[schedY.MR]>; 79620b57cec5SDimitry Andric} 79630b57cec5SDimitry Andric 79640b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 79650b57cec5SDimitry Andric int_x86_avx2_maskload_d, 79660b57cec5SDimitry Andric int_x86_avx2_maskload_d_256, 79670b57cec5SDimitry Andric int_x86_avx2_maskstore_d, 79685ffd83dbSDimitry Andric int_x86_avx2_maskstore_d_256, 79695ffd83dbSDimitry Andric WriteVecMaskMove32, WriteVecMaskMove32Y>; 79700b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 79710b57cec5SDimitry Andric int_x86_avx2_maskload_q, 79720b57cec5SDimitry Andric int_x86_avx2_maskload_q_256, 79730b57cec5SDimitry Andric int_x86_avx2_maskstore_q, 79745ffd83dbSDimitry Andric int_x86_avx2_maskstore_q_256, 797506c3fb27SDimitry Andric WriteVecMaskMove64, WriteVecMaskMove64Y>, REX_W; 79760b57cec5SDimitry Andric 79770b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 79788bcb0991SDimitry Andric ValueType MaskVT> { 79790b57cec5SDimitry Andric // masked store 79800b57cec5SDimitry Andric def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 79810b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 79820b57cec5SDimitry Andric // masked load 79830b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 79840b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 79850b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 79860b57cec5SDimitry Andric (VT immAllZerosV))), 79870b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 79880b57cec5SDimitry Andric} 79890b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 79908bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; 79918bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; 79928bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; 79938bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; 79940b57cec5SDimitry Andric} 79950b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 79960b57cec5SDimitry Andric // load/store i32/i64 not supported use ps/pd version 79978bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; 79988bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; 79998bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; 80008bcb0991SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; 80010b57cec5SDimitry Andric} 80020b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 80038bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; 80048bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; 80058bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; 80068bcb0991SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; 80070b57cec5SDimitry Andric} 80080b57cec5SDimitry Andric 80090b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80100b57cec5SDimitry Andric// Variable Bit Shifts 80110b57cec5SDimitry Andric// 80120b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 80130b57cec5SDimitry Andric ValueType vt128, ValueType vt256> { 80140b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 80150b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 80160b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 80170b57cec5SDimitry Andric [(set VR128:$dst, 80180b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 8019cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM]>; 80200b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 80210b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 80220b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 80230b57cec5SDimitry Andric [(set VR128:$dst, 80240b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, 80250b57cec5SDimitry Andric (vt128 (load addr:$src2)))))]>, 8026cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVarVecShift.XMM.Folded, 80270b57cec5SDimitry Andric SchedWriteVarVecShift.XMM.ReadAfterFold]>; 80280b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 80290b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 80300b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 80310b57cec5SDimitry Andric [(set VR256:$dst, 80320b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 8033cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 80340b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 80350b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 80360b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 80370b57cec5SDimitry Andric [(set VR256:$dst, 80380b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, 80390b57cec5SDimitry Andric (vt256 (load addr:$src2)))))]>, 8040cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 80410b57cec5SDimitry Andric SchedWriteVarVecShift.YMM.ReadAfterFold]>; 80420b57cec5SDimitry Andric} 80430b57cec5SDimitry Andric 80440b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 80450b57cec5SDimitry Andric defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 804606c3fb27SDimitry Andric defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, REX_W; 80470b57cec5SDimitry Andric defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 804806c3fb27SDimitry Andric defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, REX_W; 80490b57cec5SDimitry Andric defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 80500b57cec5SDimitry Andric} 80510b57cec5SDimitry Andric 80520b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80530b57cec5SDimitry Andric// VGATHER - GATHER Operations 80540b57cec5SDimitry Andric 80550b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions. 8056349cc55cSDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8057349cc55cSDimitry Andric X86MemOperand memop128, X86MemOperand memop256> { 80585ffd83dbSDimitry Andriclet mayLoad = 1, hasSideEffects = 0 in { 80590b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 80600b57cec5SDimitry Andric (ins VR128:$src1, memop128:$src2, VR128:$mask), 80610b57cec5SDimitry Andric !strconcat(OpcodeStr, 80620b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8063fe6060f1SDimitry Andric []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 80640b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 80650b57cec5SDimitry Andric (ins RC256:$src1, memop256:$src2, RC256:$mask), 80660b57cec5SDimitry Andric !strconcat(OpcodeStr, 80670b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8068fe6060f1SDimitry Andric []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 80695ffd83dbSDimitry Andric} 80700b57cec5SDimitry Andric} 80710b57cec5SDimitry Andric 80720b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 80730b57cec5SDimitry Andric let mayLoad = 1, hasSideEffects = 0, Constraints 80740b57cec5SDimitry Andric = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 80750b57cec5SDimitry Andric in { 8076349cc55cSDimitry Andric defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", 807706c3fb27SDimitry Andric VR256, vx128mem, vx256mem>, REX_W; 8078349cc55cSDimitry Andric defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", 807906c3fb27SDimitry Andric VR256, vx128mem, vy256mem>, REX_W; 8080349cc55cSDimitry Andric defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", 80815ffd83dbSDimitry Andric VR256, vx128mem, vy256mem>; 8082349cc55cSDimitry Andric defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", 80835ffd83dbSDimitry Andric VR128, vx64mem, vy128mem>; 80840b57cec5SDimitry Andric 80850b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 8086349cc55cSDimitry Andric defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", 808706c3fb27SDimitry Andric VR256, vx128mem, vx256mem>, REX_W; 8088349cc55cSDimitry Andric defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", 808906c3fb27SDimitry Andric VR256, vx128mem, vy256mem>, REX_W; 80900b57cec5SDimitry Andric } 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 8093349cc55cSDimitry Andric defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", 8094349cc55cSDimitry Andric VR256, vx128mem, vy256mem>; 8095349cc55cSDimitry Andric defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", 8096349cc55cSDimitry Andric VR128, vx64mem, vy128mem>; 80970b57cec5SDimitry Andric } 80980b57cec5SDimitry Andric } 80990b57cec5SDimitry Andric} 81000b57cec5SDimitry Andric 81010b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 81020b57cec5SDimitry Andric// GFNI instructions 81030b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 81040b57cec5SDimitry Andric 81050b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 81060b57cec5SDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 8107bdd1243dSDimitry Andric X86MemOperand X86MemOp, X86FoldableSchedWrite sched, 8108bdd1243dSDimitry Andric bit Is2Addr = 0> { 81090b57cec5SDimitry Andric let ExeDomain = SSEPackedInt, 81100b57cec5SDimitry Andric AsmString = !if(Is2Addr, 81115ffd83dbSDimitry Andric OpcodeStr#"\t{$src2, $dst|$dst, $src2}", 81125ffd83dbSDimitry Andric OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 81130b57cec5SDimitry Andric let isCommutable = 1 in 81140b57cec5SDimitry Andric def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 81150b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 8116cb14a3feSDimitry Andric Sched<[sched]>, T8; 81170b57cec5SDimitry Andric 81180b57cec5SDimitry Andric def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 81190b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 81200b57cec5SDimitry Andric (MemOpFrag addr:$src2))))]>, 8121cb14a3feSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, T8; 81220b57cec5SDimitry Andric } 81230b57cec5SDimitry Andric} 81240b57cec5SDimitry Andric 81250b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 81260b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 8127bdd1243dSDimitry Andric X86MemOperand X86MemOp, X86FoldableSchedWrite sched, 8128bdd1243dSDimitry Andric bit Is2Addr = 0> { 81290b57cec5SDimitry Andric let AsmString = !if(Is2Addr, 81305ffd83dbSDimitry Andric OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 81315ffd83dbSDimitry Andric OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 81320b57cec5SDimitry Andric def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 81330b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), "", 81348bcb0991SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], 8135bdd1243dSDimitry Andric SSEPackedInt>, Sched<[sched]>; 81360b57cec5SDimitry Andric def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 81370b57cec5SDimitry Andric (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 81380b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, 81390b57cec5SDimitry Andric (MemOpFrag addr:$src2), 81408bcb0991SDimitry Andric timm:$src3)))], SSEPackedInt>, 8141bdd1243dSDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 81420b57cec5SDimitry Andric } 81430b57cec5SDimitry Andric} 81440b57cec5SDimitry Andric 81450b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 81460b57cec5SDimitry Andric let Constraints = "$src1 = $dst", 81470b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 81480b57cec5SDimitry Andric defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 8149bdd1243dSDimitry Andric VR128, load, i128mem, SchedWriteVecIMul.XMM, 1>; 8150bdd1243dSDimitry Andric let Predicates = [HasGFNI, HasAVX, NoVLX] in { 81515ffd83dbSDimitry Andric defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128, 8152bdd1243dSDimitry Andric load, i128mem, SchedWriteVecIMul.XMM>, 8153cb14a3feSDimitry Andric VEX, VVVV, REX_W; 81545ffd83dbSDimitry Andric defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256, 8155bdd1243dSDimitry Andric load, i256mem, SchedWriteVecIMul.YMM>, 8156cb14a3feSDimitry Andric VEX, VVVV, VEX_L, REX_W; 81570b57cec5SDimitry Andric } 81580b57cec5SDimitry Andric} 81590b57cec5SDimitry Andric 81600b57cec5SDimitry Andric// GF2P8MULB 81610b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", 81620b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 81630b57cec5SDimitry Andricdefm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 8164bdd1243dSDimitry Andric i128mem, SchedWriteVecALU.XMM, 1>; 8165bdd1243dSDimitry Andriclet Predicates = [HasGFNI, HasAVX, NoVLX] in { 81660b57cec5SDimitry Andric defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 8167cb14a3feSDimitry Andric i128mem, SchedWriteVecALU.XMM>, VEX, VVVV; 81680b57cec5SDimitry Andric defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 8169cb14a3feSDimitry Andric i256mem, SchedWriteVecALU.YMM>, VEX, VVVV, VEX_L; 81700b57cec5SDimitry Andric} 81710b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB 81720b57cec5SDimitry Andriclet isCommutable = 0 in { 81730b57cec5SDimitry Andric defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 8174cb14a3feSDimitry Andric X86GF2P8affineinvqb>, TA, PD; 81750b57cec5SDimitry Andric defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 8176cb14a3feSDimitry Andric X86GF2P8affineqb>, TA, PD; 81770b57cec5SDimitry Andric} 81780b57cec5SDimitry Andric 8179bdd1243dSDimitry Andric// AVX-IFMA 81805f757f3fSDimitry Andriclet Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst" in 8181bdd1243dSDimitry Andricmulticlass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> { 8182bdd1243dSDimitry Andric // NOTE: The SDNode have the multiply operands first with the add last. 8183bdd1243dSDimitry Andric // This enables commuted load patterns to be autogenerated by tablegen. 8184bdd1243dSDimitry Andric let isCommutable = 1 in { 8185bdd1243dSDimitry Andric def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 8186bdd1243dSDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 8187bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8188bdd1243dSDimitry Andric [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, 8189bdd1243dSDimitry Andric VR128:$src3, VR128:$src1)))]>, 8190cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; 8191bdd1243dSDimitry Andric } 8192bdd1243dSDimitry Andric def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 8193bdd1243dSDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 8194bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8195bdd1243dSDimitry Andric [(set VR128:$dst, (v2i64 (OpNode VR128:$src2, 8196bdd1243dSDimitry Andric (loadv2i64 addr:$src3), VR128:$src1)))]>, 8197cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; 8198bdd1243dSDimitry Andric let isCommutable = 1 in { 8199bdd1243dSDimitry Andric def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 8200bdd1243dSDimitry Andric (ins VR256:$src1, VR256:$src2, VR256:$src3), 8201bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8202bdd1243dSDimitry Andric [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, 8203bdd1243dSDimitry Andric VR256:$src3, VR256:$src1)))]>, 8204cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 8205bdd1243dSDimitry Andric } 8206bdd1243dSDimitry Andric def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 8207bdd1243dSDimitry Andric (ins VR256:$src1, VR256:$src2, i256mem:$src3), 8208bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8209bdd1243dSDimitry Andric [(set VR256:$dst, (v4i64 (OpNode VR256:$src2, 8210bdd1243dSDimitry Andric (loadv4i64 addr:$src3), VR256:$src1)))]>, 8211cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 8212bdd1243dSDimitry Andric} 8213bdd1243dSDimitry Andric 821406c3fb27SDimitry Andricdefm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, REX_W, ExplicitVEXPrefix; 821506c3fb27SDimitry Andricdefm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, REX_W, ExplicitVEXPrefix; 8216bdd1243dSDimitry Andric 8217bdd1243dSDimitry Andric// AVX-VNNI-INT8 8218bdd1243dSDimitry Andriclet Constraints = "$src1 = $dst" in 8219bdd1243dSDimitry Andricmulticlass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT, 8220bdd1243dSDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 8221bdd1243dSDimitry Andric X86MemOperand X86memop, SDNode OpNode, 8222bdd1243dSDimitry Andric X86FoldableSchedWrite Sched, 8223bdd1243dSDimitry Andric bit IsCommutable> { 8224bdd1243dSDimitry Andric let isCommutable = IsCommutable in 8225bdd1243dSDimitry Andric def rr : I<Opc, MRMSrcReg, (outs RC:$dst), 8226bdd1243dSDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 8227bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8228bdd1243dSDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, 8229cb14a3feSDimitry Andric VEX, VVVV, Sched<[Sched]>; 8230bdd1243dSDimitry Andric def rm : I<Opc, MRMSrcMem, (outs RC:$dst), 8231bdd1243dSDimitry Andric (ins RC:$src1, RC:$src2, X86memop:$src3), 8232bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 8233bdd1243dSDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, 8234bdd1243dSDimitry Andric (MemOpFrag addr:$src3))))]>, 8235cb14a3feSDimitry Andric VEX, VVVV, Sched<[Sched.Folded, Sched.ReadAfterFold]>; 8236bdd1243dSDimitry Andric} 8237bdd1243dSDimitry Andric 8238bdd1243dSDimitry Andriclet Predicates = [HasAVXVNNIINT8] in { 8239bdd1243dSDimitry Andric defm VPDPBSSD : avx_dotprod_rm<0x50,"vpdpbssd", v4i32, VR128, loadv4i32, 8240bdd1243dSDimitry Andric i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM, 8241cb14a3feSDimitry Andric 1>, T8, XD; 8242bdd1243dSDimitry Andric defm VPDPBSSDY : avx_dotprod_rm<0x50,"vpdpbssd", v8i32, VR256, loadv8i32, 8243bdd1243dSDimitry Andric i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM, 8244cb14a3feSDimitry Andric 1>, VEX_L, T8, XD; 8245bdd1243dSDimitry Andric defm VPDPBUUD : avx_dotprod_rm<0x50,"vpdpbuud", v4i32, VR128, loadv4i32, 8246bdd1243dSDimitry Andric i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM, 8247cb14a3feSDimitry Andric 1>, T8; 8248bdd1243dSDimitry Andric defm VPDPBUUDY : avx_dotprod_rm<0x50,"vpdpbuud", v8i32, VR256, loadv8i32, 8249bdd1243dSDimitry Andric i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM, 8250cb14a3feSDimitry Andric 1>, VEX_L, T8; 8251bdd1243dSDimitry Andric defm VPDPBSSDS : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32, 8252bdd1243dSDimitry Andric i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM, 8253cb14a3feSDimitry Andric 1>, T8, XD; 8254bdd1243dSDimitry Andric defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32, 8255bdd1243dSDimitry Andric i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM, 8256cb14a3feSDimitry Andric 1>, VEX_L, T8, XD; 8257bdd1243dSDimitry Andric defm VPDPBUUDS : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32, 8258bdd1243dSDimitry Andric i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM, 8259cb14a3feSDimitry Andric 1>, T8; 8260bdd1243dSDimitry Andric defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32, 8261bdd1243dSDimitry Andric i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM, 8262cb14a3feSDimitry Andric 1>, VEX_L, T8; 8263bdd1243dSDimitry Andric defm VPDPBSUD : avx_dotprod_rm<0x50,"vpdpbsud", v4i32, VR128, loadv4i32, 8264bdd1243dSDimitry Andric i128mem, X86vpdpbsud, SchedWriteVecIMul.XMM, 8265cb14a3feSDimitry Andric 0>, T8, XS; 8266bdd1243dSDimitry Andric defm VPDPBSUDY : avx_dotprod_rm<0x50,"vpdpbsud", v8i32, VR256, loadv8i32, 8267bdd1243dSDimitry Andric i256mem, X86vpdpbsud, SchedWriteVecIMul.YMM, 8268cb14a3feSDimitry Andric 0>, VEX_L, T8, XS; 8269bdd1243dSDimitry Andric defm VPDPBSUDS : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32, 8270bdd1243dSDimitry Andric i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM, 8271cb14a3feSDimitry Andric 0>, T8, XS; 8272bdd1243dSDimitry Andric defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32, 8273bdd1243dSDimitry Andric i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM, 8274cb14a3feSDimitry Andric 0>, VEX_L, T8, XS; 8275bdd1243dSDimitry Andric} 8276bdd1243dSDimitry Andric 8277bdd1243dSDimitry Andric// AVX-NE-CONVERT 8278bdd1243dSDimitry Andricmulticlass AVX_NE_CONVERT_BASE<bits<8> Opcode, string OpcodeStr, 8279bdd1243dSDimitry Andric X86MemOperand MemOp128, X86MemOperand MemOp256> { 8280bdd1243dSDimitry Andric def rm : I<Opcode, MRMSrcMem, (outs VR128:$dst), (ins MemOp128:$src), 8281bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8282bdd1243dSDimitry Andric [(set VR128:$dst, 8283bdd1243dSDimitry Andric (!cast<Intrinsic>("int_x86_"#OpcodeStr#"128") addr:$src))]>, 8284bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>, VEX; 8285bdd1243dSDimitry Andric def Yrm : I<Opcode, MRMSrcMem, (outs VR256:$dst), (ins MemOp256:$src), 8286bdd1243dSDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8287bdd1243dSDimitry Andric [(set VR256:$dst, 8288bdd1243dSDimitry Andric (!cast<Intrinsic>("int_x86_"#OpcodeStr#"256") addr:$src))]>, 8289bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX, VEX_L; 8290bdd1243dSDimitry Andric} 8291bdd1243dSDimitry Andric 8292bdd1243dSDimitry Andricmulticlass VCVTNEPS2BF16_BASE { 8293bdd1243dSDimitry Andric def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 8294bdd1243dSDimitry Andric "vcvtneps2bf16\t{$src, $dst|$dst, $src}", 8295bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>, 8296bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>; 8297bdd1243dSDimitry Andric def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 8298bdd1243dSDimitry Andric "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}", 8299bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>, 8300bdd1243dSDimitry Andric Sched<[WriteCvtPH2PS]>; 8301bdd1243dSDimitry Andric def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 8302bdd1243dSDimitry Andric "vcvtneps2bf16\t{$src, $dst|$dst, $src}", 8303bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>, 8304bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX_L; 8305bdd1243dSDimitry Andric def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 8306bdd1243dSDimitry Andric "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}", 8307bdd1243dSDimitry Andric [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>, 8308bdd1243dSDimitry Andric Sched<[WriteCvtPH2PSY]>, VEX_L; 8309bdd1243dSDimitry Andric} 8310bdd1243dSDimitry Andric 8311bdd1243dSDimitry Andriclet Predicates = [HasAVXNECONVERT] in { 8312bdd1243dSDimitry Andric defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem, 8313cb14a3feSDimitry Andric f16mem>, T8, XS; 8314bdd1243dSDimitry Andric defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>, 8315cb14a3feSDimitry Andric T8, PD; 8316bdd1243dSDimitry Andric defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem, 8317cb14a3feSDimitry Andric f256mem>, T8, XS; 8318bdd1243dSDimitry Andric defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem, 8319cb14a3feSDimitry Andric f256mem>, T8, PD; 8320bdd1243dSDimitry Andric defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem, 8321cb14a3feSDimitry Andric f256mem>, T8, XD; 8322bdd1243dSDimitry Andric defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem, 8323cb14a3feSDimitry Andric f256mem>, T8; 8324cb14a3feSDimitry Andric defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8, XS, ExplicitVEXPrefix; 83258a4dda33SDimitry Andric 8326*0fca6ea1SDimitry Andric def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128:$src))), 83277a6dacacSDimitry Andric (VCVTNEPS2BF16rr VR128:$src)>; 83287a6dacacSDimitry Andric def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 83297a6dacacSDimitry Andric (VCVTNEPS2BF16rm addr:$src)>; 83308a4dda33SDimitry Andric def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))), 83318a4dda33SDimitry Andric (VCVTNEPS2BF16Yrr VR256:$src)>; 83328a4dda33SDimitry Andric def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 83338a4dda33SDimitry Andric (VCVTNEPS2BF16Yrm addr:$src)>; 8334bdd1243dSDimitry Andric} 8335bdd1243dSDimitry Andric 8336bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", 8337bdd1243dSDimitry Andric (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">; 8338bdd1243dSDimitry Andricdef : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}", 8339bdd1243dSDimitry Andric (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">; 834006c3fb27SDimitry Andric 834106c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SHA512 than WriteVecIMul? 834206c3fb27SDimitry Andriclet Predicates = [HasSHA512], Constraints = "$src1 = $dst" in { 834306c3fb27SDimitry Andricdef VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst), 834406c3fb27SDimitry Andric (ins VR256:$src1, VR128:$src2), 834506c3fb27SDimitry Andric "vsha512msg1\t{$src2, $dst|$dst, $src2}", 834606c3fb27SDimitry Andric [(set VR256:$dst, 834706c3fb27SDimitry Andric (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L, 8348cb14a3feSDimitry Andric VEX, T8, XD, Sched<[WriteVecIMul]>; 834906c3fb27SDimitry Andricdef VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst), 835006c3fb27SDimitry Andric (ins VR256:$src1, VR256:$src2), 835106c3fb27SDimitry Andric "vsha512msg2\t{$src2, $dst|$dst, $src2}", 835206c3fb27SDimitry Andric [(set VR256:$dst, 835306c3fb27SDimitry Andric (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L, 8354cb14a3feSDimitry Andric VEX, T8, XD, Sched<[WriteVecIMul]>; 835506c3fb27SDimitry Andricdef VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst), 835606c3fb27SDimitry Andric (ins VR256:$src1, VR256:$src2, VR128:$src3), 835706c3fb27SDimitry Andric "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}", 835806c3fb27SDimitry Andric [(set VR256:$dst, 835906c3fb27SDimitry Andric (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>, 8360cb14a3feSDimitry Andric VEX_L, VEX, VVVV, T8, XD, Sched<[WriteVecIMul]>; 836106c3fb27SDimitry Andric} 836206c3fb27SDimitry Andric 836306c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SM3 than WriteVecIMul? 836406c3fb27SDimitry Andriclet Predicates = [HasSM3], Constraints = "$src1 = $dst" in { 836506c3fb27SDimitry Andric multiclass SM3_Base<string OpStr> { 836606c3fb27SDimitry Andric def rr : I<0xda, MRMSrcReg, (outs VR128:$dst), 836706c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 836806c3fb27SDimitry Andric !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 836906c3fb27SDimitry Andric [(set VR128:$dst, 837006c3fb27SDimitry Andric (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1, 837106c3fb27SDimitry Andric VR128:$src2, VR128:$src3))]>, 8372cb14a3feSDimitry Andric Sched<[WriteVecIMul]>, VEX, VVVV; 837306c3fb27SDimitry Andric def rm : I<0xda, MRMSrcMem, (outs VR128:$dst), 837406c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 837506c3fb27SDimitry Andric !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 837606c3fb27SDimitry Andric [(set VR128:$dst, 837706c3fb27SDimitry Andric (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1, 837806c3fb27SDimitry Andric VR128:$src2, (loadv4i32 addr:$src3)))]>, 8379cb14a3feSDimitry Andric Sched<[WriteVecIMul]>, VEX, VVVV; 838006c3fb27SDimitry Andric } 838106c3fb27SDimitry Andric 838206c3fb27SDimitry Andric multiclass VSM3RNDS2_Base { 838306c3fb27SDimitry Andric def rr : Ii8<0xde, MRMSrcReg, (outs VR128:$dst), 838406c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3, i32u8imm:$src4), 838506c3fb27SDimitry Andric "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}", 838606c3fb27SDimitry Andric [(set VR128:$dst, 838706c3fb27SDimitry Andric (int_x86_vsm3rnds2 VR128:$src1, 838806c3fb27SDimitry Andric VR128:$src2, VR128:$src3, timm:$src4))]>, 838906c3fb27SDimitry Andric Sched<[WriteVecIMul]>; 839006c3fb27SDimitry Andric def rm : Ii8<0xde, MRMSrcMem, (outs VR128:$dst), 839106c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3, i32u8imm:$src4), 839206c3fb27SDimitry Andric "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}", 839306c3fb27SDimitry Andric [(set VR128:$dst, 839406c3fb27SDimitry Andric (int_x86_vsm3rnds2 VR128:$src1, 839506c3fb27SDimitry Andric VR128:$src2, (loadv4i32 addr:$src3), timm:$src4))]>, 839606c3fb27SDimitry Andric Sched<[WriteVecIMul]>; 839706c3fb27SDimitry Andric } 839806c3fb27SDimitry Andric} 839906c3fb27SDimitry Andric 8400cb14a3feSDimitry Andricdefm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8; 8401cb14a3feSDimitry Andricdefm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8, PD; 8402cb14a3feSDimitry Andricdefm VSM3RNDS2 : VSM3RNDS2_Base, VEX, VVVV, TA, PD; 840306c3fb27SDimitry Andric 840406c3fb27SDimitry Andric// FIXME: Is there a better scheduler class for SM4 than WriteVecIMul? 840506c3fb27SDimitry Andriclet Predicates = [HasSM4] in { 840606c3fb27SDimitry Andric multiclass SM4_Base<string OpStr, RegisterClass RC, string VL, 840706c3fb27SDimitry Andric PatFrag LD, X86MemOperand MemOp> { 840806c3fb27SDimitry Andric def rr : I<0xda, MRMSrcReg, (outs RC:$dst), 840906c3fb27SDimitry Andric (ins RC:$src1, RC:$src2), 841006c3fb27SDimitry Andric !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 841106c3fb27SDimitry Andric [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1, 841206c3fb27SDimitry Andric RC:$src2))]>, 841306c3fb27SDimitry Andric Sched<[WriteVecIMul]>; 841406c3fb27SDimitry Andric def rm : I<0xda, MRMSrcMem, (outs RC:$dst), 841506c3fb27SDimitry Andric (ins RC:$src1, MemOp:$src2), 841606c3fb27SDimitry Andric !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 841706c3fb27SDimitry Andric [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1, 841806c3fb27SDimitry Andric (LD addr:$src2)))]>, 841906c3fb27SDimitry Andric Sched<[WriteVecIMul]>; 842006c3fb27SDimitry Andric } 842106c3fb27SDimitry Andric} 842206c3fb27SDimitry Andric 8423cb14a3feSDimitry Andricdefm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8, XS, VEX, VVVV; 8424cb14a3feSDimitry Andricdefm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8, XS, VEX_L, VEX, VVVV; 8425cb14a3feSDimitry Andricdefm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8, XD, VEX, VVVV; 8426cb14a3feSDimitry Andricdefm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8, XD, VEX_L, VEX, VVVV; 842706c3fb27SDimitry Andric 842806c3fb27SDimitry Andriclet Predicates = [HasAVXVNNIINT16], Constraints = "$src1 = $dst" in 842906c3fb27SDimitry Andricmulticlass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> { 843006c3fb27SDimitry Andric let isCommutable = IsCommutable in 843106c3fb27SDimitry Andric def rr : I<opc, MRMSrcReg, (outs VR128:$dst), 843206c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, VR128:$src3), 843306c3fb27SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 843406c3fb27SDimitry Andric [(set VR128:$dst, 843506c3fb27SDimitry Andric (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128") 843606c3fb27SDimitry Andric VR128:$src1, VR128:$src2, VR128:$src3)))]>, 8437cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; 843806c3fb27SDimitry Andric 843906c3fb27SDimitry Andric def rm : I<opc, MRMSrcMem, (outs VR128:$dst), 844006c3fb27SDimitry Andric (ins VR128:$src1, VR128:$src2, i128mem:$src3), 844106c3fb27SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 844206c3fb27SDimitry Andric [(set VR128:$dst, 844306c3fb27SDimitry Andric (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128") 844406c3fb27SDimitry Andric VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>, 8445cb14a3feSDimitry Andric VEX, VVVV, Sched<[SchedWriteVecIMul.XMM]>; 844606c3fb27SDimitry Andric 844706c3fb27SDimitry Andric let isCommutable = IsCommutable in 844806c3fb27SDimitry Andric def Yrr : I<opc, MRMSrcReg, (outs VR256:$dst), 844906c3fb27SDimitry Andric (ins VR256:$src1, VR256:$src2, VR256:$src3), 845006c3fb27SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 845106c3fb27SDimitry Andric [(set VR256:$dst, 845206c3fb27SDimitry Andric (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256") 845306c3fb27SDimitry Andric VR256:$src1, VR256:$src2, VR256:$src3)))]>, 8454cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 845506c3fb27SDimitry Andric 845606c3fb27SDimitry Andric def Yrm : I<opc, MRMSrcMem, (outs VR256:$dst), 845706c3fb27SDimitry Andric (ins VR256:$src1, VR256:$src2, i256mem:$src3), 845806c3fb27SDimitry Andric !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 845906c3fb27SDimitry Andric [(set VR256:$dst, 846006c3fb27SDimitry Andric (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256") 846106c3fb27SDimitry Andric VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>, 8462cb14a3feSDimitry Andric VEX, VVVV, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; 846306c3fb27SDimitry Andric} 846406c3fb27SDimitry Andric 8465cb14a3feSDimitry Andricdefm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8, XS; 8466cb14a3feSDimitry Andricdefm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8, XS; 8467cb14a3feSDimitry Andricdefm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8, PD; 8468cb14a3feSDimitry Andricdefm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8, PD; 8469cb14a3feSDimitry Andricdefm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8; 8470cb14a3feSDimitry Andricdefm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8; 8471