1*0b57cec5SDimitry Andric//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric// 9*0b57cec5SDimitry Andric// This file describes the X86 SSE instruction set, defining the instructions, 10*0b57cec5SDimitry Andric// and properties of the instructions which are needed for code generation, 11*0b57cec5SDimitry Andric// machine code emission, and analysis. 12*0b57cec5SDimitry Andric// 13*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 16*0b57cec5SDimitry Andric// SSE 1 & 2 Instructions Classes 17*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 20*0b57cec5SDimitry Andricmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 21*0b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop, 22*0b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 23*0b57cec5SDimitry Andric bit Is2Addr = 1> { 24*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 25*0b57cec5SDimitry Andric let isCommutable = 1 in { 26*0b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 27*0b57cec5SDimitry Andric !if(Is2Addr, 28*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 29*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 30*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 31*0b57cec5SDimitry Andric Sched<[sched]>; 32*0b57cec5SDimitry Andric } 33*0b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 34*0b57cec5SDimitry Andric !if(Is2Addr, 35*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 38*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 39*0b57cec5SDimitry Andric} 40*0b57cec5SDimitry Andric} 41*0b57cec5SDimitry Andric 42*0b57cec5SDimitry Andric/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 43*0b57cec5SDimitry Andricmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, 44*0b57cec5SDimitry Andric SDPatternOperator OpNode, RegisterClass RC, 45*0b57cec5SDimitry Andric ValueType VT, string asm, Operand memopr, 46*0b57cec5SDimitry Andric ComplexPattern mem_cpat, Domain d, 47*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 48*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 49*0b57cec5SDimitry Andric def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 50*0b57cec5SDimitry Andric !if(Is2Addr, 51*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 52*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 53*0b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 54*0b57cec5SDimitry Andric Sched<[sched]>; 55*0b57cec5SDimitry Andric let mayLoad = 1 in 56*0b57cec5SDimitry Andric def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 57*0b57cec5SDimitry Andric !if(Is2Addr, 58*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 59*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 60*0b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>, 61*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 62*0b57cec5SDimitry Andric} 63*0b57cec5SDimitry Andric} 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric/// sse12_fp_packed - SSE 1 & 2 packed instructions class 66*0b57cec5SDimitry Andricmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 67*0b57cec5SDimitry Andric RegisterClass RC, ValueType vt, 68*0b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, 69*0b57cec5SDimitry Andric Domain d, X86FoldableSchedWrite sched, 70*0b57cec5SDimitry Andric bit Is2Addr = 1> { 71*0b57cec5SDimitry Andric let isCommutable = 1 in 72*0b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 73*0b57cec5SDimitry Andric !if(Is2Addr, 74*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 75*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 76*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 77*0b57cec5SDimitry Andric Sched<[sched]>; 78*0b57cec5SDimitry Andric let mayLoad = 1 in 79*0b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 80*0b57cec5SDimitry Andric !if(Is2Addr, 81*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 82*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 83*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 84*0b57cec5SDimitry Andric d>, 85*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 86*0b57cec5SDimitry Andric} 87*0b57cec5SDimitry Andric 88*0b57cec5SDimitry Andric/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 89*0b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 90*0b57cec5SDimitry Andric string OpcodeStr, X86MemOperand x86memop, 91*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, 92*0b57cec5SDimitry Andric list<dag> pat_rr, list<dag> pat_rm, 93*0b57cec5SDimitry Andric bit Is2Addr = 1> { 94*0b57cec5SDimitry Andric let isCommutable = 1, hasSideEffects = 0 in 95*0b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 96*0b57cec5SDimitry Andric !if(Is2Addr, 97*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 98*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 99*0b57cec5SDimitry Andric pat_rr, d>, 100*0b57cec5SDimitry Andric Sched<[sched]>; 101*0b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 102*0b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 103*0b57cec5SDimitry Andric !if(Is2Addr, 104*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 105*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 106*0b57cec5SDimitry Andric pat_rm, d>, 107*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 108*0b57cec5SDimitry Andric} 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 112*0b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos. 113*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 114*0b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 115*0b57cec5SDimitry Andric def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 116*0b57cec5SDimitry Andric [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 117*0b57cec5SDimitry Andric def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 118*0b57cec5SDimitry Andric [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>; 119*0b57cec5SDimitry Andric} 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 122*0b57cec5SDimitry Andric// AVX & SSE - Zero/One Vectors 123*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric// Alias instruction that maps zero vector to pxor / xorp* for sse. 126*0b57cec5SDimitry Andric// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 127*0b57cec5SDimitry Andric// swizzled by ExecutionDomainFix to pxor. 128*0b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 129*0b57cec5SDimitry Andric// load of an all-zeros value if folding it would be beneficial. 130*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 131*0b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 132*0b57cec5SDimitry Andricdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 133*0b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 immAllZerosV))]>; 134*0b57cec5SDimitry Andric} 135*0b57cec5SDimitry Andric 136*0b57cec5SDimitry Andriclet Predicates = [NoAVX512] in 137*0b57cec5SDimitry Andricdef : Pat<(v4i32 immAllZerosV), (V_SET0)>; 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 141*0b57cec5SDimitry Andric// and doesn't need it because on sandy bridge the register is set to zero 142*0b57cec5SDimitry Andric// at the rename stage without using any execution unit, so SET0PSY 143*0b57cec5SDimitry Andric// and SET0PDY can be used for vector int instructions without penalty 144*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 145*0b57cec5SDimitry Andric isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 146*0b57cec5SDimitry Andricdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 147*0b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllZerosV))]>; 148*0b57cec5SDimitry Andric} 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric// We set canFoldAsLoad because this can be converted to a constant-pool 151*0b57cec5SDimitry Andric// load of an all-ones value if folding it would be beneficial. 152*0b57cec5SDimitry Andriclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 153*0b57cec5SDimitry Andric isPseudo = 1, SchedRW = [WriteZero] in { 154*0b57cec5SDimitry Andric def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 155*0b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 immAllOnesV))]>; 156*0b57cec5SDimitry Andric let Predicates = [HasAVX1Only, OptForMinSize] in { 157*0b57cec5SDimitry Andric def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 158*0b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 159*0b57cec5SDimitry Andric } 160*0b57cec5SDimitry Andric let Predicates = [HasAVX2] in 161*0b57cec5SDimitry Andric def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 162*0b57cec5SDimitry Andric [(set VR256:$dst, (v8i32 immAllOnesV))]>; 163*0b57cec5SDimitry Andric} 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 166*0b57cec5SDimitry Andric// SSE 1 & 2 - Move FP Scalar Instructions 167*0b57cec5SDimitry Andric// 168*0b57cec5SDimitry Andric// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 169*0b57cec5SDimitry Andric// register copies because it's a partial register update; Register-to-register 170*0b57cec5SDimitry Andric// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 171*0b57cec5SDimitry Andric// that the insert be implementable in terms of a copy, and just mentioned, we 172*0b57cec5SDimitry Andric// don't use movss/movsd for copies. 173*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andricmulticlass sse12_move_rr<SDNode OpNode, ValueType vt, 176*0b57cec5SDimitry Andric X86MemOperand x86memop, string base_opc, 177*0b57cec5SDimitry Andric string asm_opr, Domain d, string Name> { 178*0b57cec5SDimitry Andric let isCommutable = 1 in 179*0b57cec5SDimitry Andric def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 180*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 181*0b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), 182*0b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 183*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 184*0b57cec5SDimitry Andric 185*0b57cec5SDimitry Andric // For the disassembler 186*0b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 187*0b57cec5SDimitry Andric def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 188*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 189*0b57cec5SDimitry Andric !strconcat(base_opc, asm_opr), []>, 190*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; 191*0b57cec5SDimitry Andric} 192*0b57cec5SDimitry Andric 193*0b57cec5SDimitry Andricmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 194*0b57cec5SDimitry Andric X86MemOperand x86memop, string OpcodeStr, 195*0b57cec5SDimitry Andric Domain d, string Name, Predicate pred> { 196*0b57cec5SDimitry Andric // AVX 197*0b57cec5SDimitry Andric let Predicates = [UseAVX, OptForSize] in 198*0b57cec5SDimitry Andric defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 199*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, 200*0b57cec5SDimitry Andric "V"#Name>, 201*0b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 204*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 205*0b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 206*0b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; 207*0b57cec5SDimitry Andric // SSE1 & 2 208*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 209*0b57cec5SDimitry Andric let Predicates = [pred, NoSSE41_Or_OptForSize] in 210*0b57cec5SDimitry Andric defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 211*0b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}", d, Name>; 212*0b57cec5SDimitry Andric } 213*0b57cec5SDimitry Andric 214*0b57cec5SDimitry Andric def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 215*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 216*0b57cec5SDimitry Andric [(store RC:$src, addr:$dst)], d>, 217*0b57cec5SDimitry Andric Sched<[WriteFStore]>; 218*0b57cec5SDimitry Andric 219*0b57cec5SDimitry Andric def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 220*0b57cec5SDimitry Andric (!cast<Instruction>("V"#NAME#"rr_REV") 221*0b57cec5SDimitry Andric VR128:$dst, VR128:$src1, VR128:$src2), 0>; 222*0b57cec5SDimitry Andric def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 223*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rr_REV") 224*0b57cec5SDimitry Andric VR128:$dst, VR128:$src2), 0>; 225*0b57cec5SDimitry Andric} 226*0b57cec5SDimitry Andric 227*0b57cec5SDimitry Andric// Loading from memory automatically zeroing upper bits. 228*0b57cec5SDimitry Andricmulticlass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 229*0b57cec5SDimitry Andric PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 230*0b57cec5SDimitry Andric Domain d> { 231*0b57cec5SDimitry Andric def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 232*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 233*0b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 234*0b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 235*0b57cec5SDimitry Andric def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 236*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 237*0b57cec5SDimitry Andric [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 238*0b57cec5SDimitry Andric Sched<[WriteFLoad]>; 239*0b57cec5SDimitry Andric 240*0b57cec5SDimitry Andric // _alt version uses FR32/FR64 register class. 241*0b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 242*0b57cec5SDimitry Andric def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 243*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 244*0b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 245*0b57cec5SDimitry Andric VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 246*0b57cec5SDimitry Andric def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 247*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 248*0b57cec5SDimitry Andric [(set RC:$dst, (mem_pat addr:$src))], d>, 249*0b57cec5SDimitry Andric Sched<[WriteFLoad]>; 250*0b57cec5SDimitry Andric } 251*0b57cec5SDimitry Andric} 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andricdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 254*0b57cec5SDimitry Andric SSEPackedSingle, "MOVSS", UseSSE1>, XS; 255*0b57cec5SDimitry Andricdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 256*0b57cec5SDimitry Andric SSEPackedDouble, "MOVSD", UseSSE2>, XD; 257*0b57cec5SDimitry Andric 258*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in { 259*0b57cec5SDimitry Andric defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 260*0b57cec5SDimitry Andric SSEPackedSingle>, XS; 261*0b57cec5SDimitry Andric defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 262*0b57cec5SDimitry Andric SSEPackedDouble>, XD; 263*0b57cec5SDimitry Andric} 264*0b57cec5SDimitry Andric 265*0b57cec5SDimitry Andric// Patterns 266*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 267*0b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 268*0b57cec5SDimitry Andric (VMOVSSrm addr:$src)>; 269*0b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 270*0b57cec5SDimitry Andric (VMOVSDrm addr:$src)>; 271*0b57cec5SDimitry Andric 272*0b57cec5SDimitry Andric // Represent the same patterns above but in the form they appear for 273*0b57cec5SDimitry Andric // 256-bit types 274*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzload32 addr:$src)), 275*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 276*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzload64 addr:$src)), 277*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 278*0b57cec5SDimitry Andric} 279*0b57cec5SDimitry Andric 280*0b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 281*0b57cec5SDimitry Andric // Move scalar to XMM zero-extended, zeroing a VR128 then do a 282*0b57cec5SDimitry Andric // MOVSS to the lower bits. 283*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 284*0b57cec5SDimitry Andric (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 285*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 286*0b57cec5SDimitry Andric (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric // Move low f32 and clear high bits. 289*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 290*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 291*0b57cec5SDimitry Andric (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 292*0b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 293*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 294*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 295*0b57cec5SDimitry Andric (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 296*0b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 297*0b57cec5SDimitry Andric} 298*0b57cec5SDimitry Andric 299*0b57cec5SDimitry Andriclet Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 300*0b57cec5SDimitry Andric// Move scalar to XMM zero-extended, zeroing a VR128 then do a 301*0b57cec5SDimitry Andric// MOVSS to the lower bits. 302*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 303*0b57cec5SDimitry Andric (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 304*0b57cec5SDimitry Andricdef : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 305*0b57cec5SDimitry Andric (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 306*0b57cec5SDimitry Andric} 307*0b57cec5SDimitry Andric 308*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in 309*0b57cec5SDimitry Andricdef : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 310*0b57cec5SDimitry Andric (MOVSDrm addr:$src)>; 311*0b57cec5SDimitry Andric 312*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in 313*0b57cec5SDimitry Andricdef : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 314*0b57cec5SDimitry Andric (MOVSSrm addr:$src)>; 315*0b57cec5SDimitry Andric 316*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 317*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 318*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 319*0b57cec5SDimitry Andric 320*0b57cec5SDimitry Andricmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC, 321*0b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag ld_frag, 322*0b57cec5SDimitry Andric string asm, Domain d, 323*0b57cec5SDimitry Andric X86SchedWriteMoveLS sched> { 324*0b57cec5SDimitry Andriclet hasSideEffects = 0, isMoveReg = 1 in 325*0b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 326*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 327*0b57cec5SDimitry Andric Sched<[sched.RR]>; 328*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, isReMaterializable = 1 in 329*0b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 330*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 331*0b57cec5SDimitry Andric [(set RC:$dst, (ld_frag addr:$src))], d>, 332*0b57cec5SDimitry Andric Sched<[sched.RM]>; 333*0b57cec5SDimitry Andric} 334*0b57cec5SDimitry Andric 335*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 336*0b57cec5SDimitry Andricdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 337*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 338*0b57cec5SDimitry Andric PS, VEX, VEX_WIG; 339*0b57cec5SDimitry Andricdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 340*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 341*0b57cec5SDimitry Andric PD, VEX, VEX_WIG; 342*0b57cec5SDimitry Andricdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 343*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 344*0b57cec5SDimitry Andric PS, VEX, VEX_WIG; 345*0b57cec5SDimitry Andricdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 346*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 347*0b57cec5SDimitry Andric PD, VEX, VEX_WIG; 348*0b57cec5SDimitry Andric 349*0b57cec5SDimitry Andricdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 350*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 351*0b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 352*0b57cec5SDimitry Andricdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 353*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 354*0b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 355*0b57cec5SDimitry Andricdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 356*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.YMM>, 357*0b57cec5SDimitry Andric PS, VEX, VEX_L, VEX_WIG; 358*0b57cec5SDimitry Andricdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 359*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.YMM>, 360*0b57cec5SDimitry Andric PD, VEX, VEX_L, VEX_WIG; 361*0b57cec5SDimitry Andric} 362*0b57cec5SDimitry Andric 363*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 364*0b57cec5SDimitry Andricdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 365*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 366*0b57cec5SDimitry Andric PS; 367*0b57cec5SDimitry Andricdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 368*0b57cec5SDimitry Andric SSEPackedSingle, SchedWriteFMoveLS.XMM>, 369*0b57cec5SDimitry Andric PS; 370*0b57cec5SDimitry Andric} 371*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 372*0b57cec5SDimitry Andricdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 373*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 374*0b57cec5SDimitry Andric PD; 375*0b57cec5SDimitry Andricdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 376*0b57cec5SDimitry Andric SSEPackedDouble, SchedWriteFMoveLS.XMM>, 377*0b57cec5SDimitry Andric PD; 378*0b57cec5SDimitry Andric} 379*0b57cec5SDimitry Andric 380*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 381*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 382*0b57cec5SDimitry Andricdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 383*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 384*0b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 385*0b57cec5SDimitry Andric VEX, VEX_WIG; 386*0b57cec5SDimitry Andricdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 387*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 388*0b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 389*0b57cec5SDimitry Andric VEX, VEX_WIG; 390*0b57cec5SDimitry Andricdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 391*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 392*0b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>, 393*0b57cec5SDimitry Andric VEX, VEX_WIG; 394*0b57cec5SDimitry Andricdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 395*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 396*0b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>, 397*0b57cec5SDimitry Andric VEX, VEX_WIG; 398*0b57cec5SDimitry Andric} // SchedRW 399*0b57cec5SDimitry Andric 400*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 401*0b57cec5SDimitry Andricdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 402*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 403*0b57cec5SDimitry Andric [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 404*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 405*0b57cec5SDimitry Andricdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 406*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 407*0b57cec5SDimitry Andric [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 408*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 409*0b57cec5SDimitry Andricdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 410*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 411*0b57cec5SDimitry Andric [(store (v8f32 VR256:$src), addr:$dst)]>, 412*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 413*0b57cec5SDimitry Andricdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 414*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 415*0b57cec5SDimitry Andric [(store (v4f64 VR256:$src), addr:$dst)]>, 416*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 417*0b57cec5SDimitry Andric} // SchedRW 418*0b57cec5SDimitry Andric} // Predicate 419*0b57cec5SDimitry Andric 420*0b57cec5SDimitry Andric// For disassembler 421*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 422*0b57cec5SDimitry Andric isMoveReg = 1 in { 423*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 424*0b57cec5SDimitry Andric def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 425*0b57cec5SDimitry Andric (ins VR128:$src), 426*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 427*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; 428*0b57cec5SDimitry Andric def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 429*0b57cec5SDimitry Andric (ins VR128:$src), 430*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 431*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; 432*0b57cec5SDimitry Andric def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 433*0b57cec5SDimitry Andric (ins VR128:$src), 434*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 435*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; 436*0b57cec5SDimitry Andric def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 437*0b57cec5SDimitry Andric (ins VR128:$src), 438*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 439*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; 440*0b57cec5SDimitry Andric} // SchedRW 441*0b57cec5SDimitry Andric 442*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 443*0b57cec5SDimitry Andric def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 444*0b57cec5SDimitry Andric (ins VR256:$src), 445*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 446*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; 447*0b57cec5SDimitry Andric def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 448*0b57cec5SDimitry Andric (ins VR256:$src), 449*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 450*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; 451*0b57cec5SDimitry Andric def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 452*0b57cec5SDimitry Andric (ins VR256:$src), 453*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 454*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; 455*0b57cec5SDimitry Andric def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 456*0b57cec5SDimitry Andric (ins VR256:$src), 457*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 458*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; 459*0b57cec5SDimitry Andric} // SchedRW 460*0b57cec5SDimitry Andric} // Predicate 461*0b57cec5SDimitry Andric 462*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 463*0b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 464*0b57cec5SDimitry Andric (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 465*0b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 466*0b57cec5SDimitry Andric (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 467*0b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 468*0b57cec5SDimitry Andric (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 469*0b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 470*0b57cec5SDimitry Andric (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 471*0b57cec5SDimitry Andricdef : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 472*0b57cec5SDimitry Andric (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 473*0b57cec5SDimitry Andricdef : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 474*0b57cec5SDimitry Andric (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 475*0b57cec5SDimitry Andricdef : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 476*0b57cec5SDimitry Andric (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 477*0b57cec5SDimitry Andricdef : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 478*0b57cec5SDimitry Andric (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 481*0b57cec5SDimitry Andricdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 482*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", 483*0b57cec5SDimitry Andric [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 484*0b57cec5SDimitry Andricdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 485*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", 486*0b57cec5SDimitry Andric [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 487*0b57cec5SDimitry Andricdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 488*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", 489*0b57cec5SDimitry Andric [(store (v4f32 VR128:$src), addr:$dst)]>; 490*0b57cec5SDimitry Andricdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 491*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", 492*0b57cec5SDimitry Andric [(store (v2f64 VR128:$src), addr:$dst)]>; 493*0b57cec5SDimitry Andric} // SchedRW 494*0b57cec5SDimitry Andric 495*0b57cec5SDimitry Andric// For disassembler 496*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 497*0b57cec5SDimitry Andric isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 498*0b57cec5SDimitry Andric def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 499*0b57cec5SDimitry Andric "movaps\t{$src, $dst|$dst, $src}", []>, 500*0b57cec5SDimitry Andric FoldGenData<"MOVAPSrr">; 501*0b57cec5SDimitry Andric def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 502*0b57cec5SDimitry Andric "movapd\t{$src, $dst|$dst, $src}", []>, 503*0b57cec5SDimitry Andric FoldGenData<"MOVAPDrr">; 504*0b57cec5SDimitry Andric def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 505*0b57cec5SDimitry Andric "movups\t{$src, $dst|$dst, $src}", []>, 506*0b57cec5SDimitry Andric FoldGenData<"MOVUPSrr">; 507*0b57cec5SDimitry Andric def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 508*0b57cec5SDimitry Andric "movupd\t{$src, $dst|$dst, $src}", []>, 509*0b57cec5SDimitry Andric FoldGenData<"MOVUPDrr">; 510*0b57cec5SDimitry Andric} 511*0b57cec5SDimitry Andric 512*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 513*0b57cec5SDimitry Andricdef : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 514*0b57cec5SDimitry Andric (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 515*0b57cec5SDimitry Andricdef : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 516*0b57cec5SDimitry Andric (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 517*0b57cec5SDimitry Andricdef : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 518*0b57cec5SDimitry Andric (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 519*0b57cec5SDimitry Andricdef : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 520*0b57cec5SDimitry Andric (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 521*0b57cec5SDimitry Andric 522*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 523*0b57cec5SDimitry Andric // 256-bit load/store need to use floating point load/store in case we don't 524*0b57cec5SDimitry Andric // have AVX2. Execution domain fixing will convert to integer if AVX2 is 525*0b57cec5SDimitry Andric // available and changing the domain is beneficial. 526*0b57cec5SDimitry Andric def : Pat<(alignedloadv4i64 addr:$src), 527*0b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 528*0b57cec5SDimitry Andric def : Pat<(alignedloadv8i32 addr:$src), 529*0b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 530*0b57cec5SDimitry Andric def : Pat<(alignedloadv16i16 addr:$src), 531*0b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 532*0b57cec5SDimitry Andric def : Pat<(alignedloadv32i8 addr:$src), 533*0b57cec5SDimitry Andric (VMOVAPSYrm addr:$src)>; 534*0b57cec5SDimitry Andric def : Pat<(loadv4i64 addr:$src), 535*0b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 536*0b57cec5SDimitry Andric def : Pat<(loadv8i32 addr:$src), 537*0b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 538*0b57cec5SDimitry Andric def : Pat<(loadv16i16 addr:$src), 539*0b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 540*0b57cec5SDimitry Andric def : Pat<(loadv32i8 addr:$src), 541*0b57cec5SDimitry Andric (VMOVUPSYrm addr:$src)>; 542*0b57cec5SDimitry Andric 543*0b57cec5SDimitry Andric def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 544*0b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 545*0b57cec5SDimitry Andric def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 546*0b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 547*0b57cec5SDimitry Andric def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 548*0b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 549*0b57cec5SDimitry Andric def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 550*0b57cec5SDimitry Andric (VMOVAPSYmr addr:$dst, VR256:$src)>; 551*0b57cec5SDimitry Andric def : Pat<(store (v4i64 VR256:$src), addr:$dst), 552*0b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 553*0b57cec5SDimitry Andric def : Pat<(store (v8i32 VR256:$src), addr:$dst), 554*0b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 555*0b57cec5SDimitry Andric def : Pat<(store (v16i16 VR256:$src), addr:$dst), 556*0b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 557*0b57cec5SDimitry Andric def : Pat<(store (v32i8 VR256:$src), addr:$dst), 558*0b57cec5SDimitry Andric (VMOVUPSYmr addr:$dst, VR256:$src)>; 559*0b57cec5SDimitry Andric} 560*0b57cec5SDimitry Andric 561*0b57cec5SDimitry Andric// Use movaps / movups for SSE integer load / store (one byte shorter). 562*0b57cec5SDimitry Andric// The instructions selected below are then converted to MOVDQA/MOVDQU 563*0b57cec5SDimitry Andric// during the SSE domain pass. 564*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 565*0b57cec5SDimitry Andric def : Pat<(alignedloadv2i64 addr:$src), 566*0b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 567*0b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 568*0b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 569*0b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 570*0b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 571*0b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 572*0b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 573*0b57cec5SDimitry Andric def : Pat<(loadv2i64 addr:$src), 574*0b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 575*0b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 576*0b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 577*0b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 578*0b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 579*0b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 580*0b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 581*0b57cec5SDimitry Andric 582*0b57cec5SDimitry Andric def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 583*0b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 584*0b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 585*0b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 586*0b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 587*0b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 588*0b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 589*0b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 590*0b57cec5SDimitry Andric def : Pat<(store (v2i64 VR128:$src), addr:$dst), 591*0b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 592*0b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 593*0b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 594*0b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 595*0b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 596*0b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 597*0b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 598*0b57cec5SDimitry Andric} 599*0b57cec5SDimitry Andric 600*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 601*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Low packed FP Instructions 602*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 603*0b57cec5SDimitry Andric 604*0b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDNode pdnode, 605*0b57cec5SDimitry Andric string base_opc, string asm_opr> { 606*0b57cec5SDimitry Andric // No pattern as they need be special cased between high and low. 607*0b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 608*0b57cec5SDimitry Andric def PSrm : PI<opc, MRMSrcMem, 609*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 610*0b57cec5SDimitry Andric !strconcat(base_opc, "s", asm_opr), 611*0b57cec5SDimitry Andric [], SSEPackedSingle>, PS, 612*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 613*0b57cec5SDimitry Andric 614*0b57cec5SDimitry Andric def PDrm : PI<opc, MRMSrcMem, 615*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 616*0b57cec5SDimitry Andric !strconcat(base_opc, "d", asm_opr), 617*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 618*0b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src2)))))], 619*0b57cec5SDimitry Andric SSEPackedDouble>, PD, 620*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 621*0b57cec5SDimitry Andric} 622*0b57cec5SDimitry Andric 623*0b57cec5SDimitry Andricmulticlass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 624*0b57cec5SDimitry Andric string base_opc> { 625*0b57cec5SDimitry Andric let Predicates = [UseAVX] in 626*0b57cec5SDimitry Andric defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 627*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 628*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 629*0b57cec5SDimitry Andric 630*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 631*0b57cec5SDimitry Andric defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 632*0b57cec5SDimitry Andric "\t{$src2, $dst|$dst, $src2}">; 633*0b57cec5SDimitry Andric} 634*0b57cec5SDimitry Andric 635*0b57cec5SDimitry Andricdefm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 636*0b57cec5SDimitry Andric 637*0b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 638*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 639*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 640*0b57cec5SDimitry Andricdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 641*0b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 642*0b57cec5SDimitry Andric []>, 643*0b57cec5SDimitry Andric VEX, VEX_WIG; 644*0b57cec5SDimitry Andricdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 645*0b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 646*0b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 647*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 648*0b57cec5SDimitry Andric VEX, VEX_WIG; 649*0b57cec5SDimitry Andric}// UseAVX 650*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 651*0b57cec5SDimitry Andricdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 652*0b57cec5SDimitry Andric "movlps\t{$src, $dst|$dst, $src}", 653*0b57cec5SDimitry Andric []>; 654*0b57cec5SDimitry Andricdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 655*0b57cec5SDimitry Andric "movlpd\t{$src, $dst|$dst, $src}", 656*0b57cec5SDimitry Andric [(store (f64 (extractelt (v2f64 VR128:$src), 657*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 658*0b57cec5SDimitry Andric} // SchedRW 659*0b57cec5SDimitry Andric 660*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 661*0b57cec5SDimitry Andric // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 662*0b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 663*0b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 664*0b57cec5SDimitry Andric def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1, 665*0b57cec5SDimitry Andric (i8 -28)), 666*0b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 667*0b57cec5SDimitry Andric def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 668*0b57cec5SDimitry Andric (MOVLPSrm VR128:$src1, addr:$src2)>; 669*0b57cec5SDimitry Andric 670*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzload64 addr:$src)), 671*0b57cec5SDimitry Andric (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 672*0b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 673*0b57cec5SDimitry Andric (MOVLPSmr addr:$dst, VR128:$src)>; 674*0b57cec5SDimitry Andric} 675*0b57cec5SDimitry Andric 676*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 677*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Hi packed FP Instructions 678*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 679*0b57cec5SDimitry Andric 680*0b57cec5SDimitry Andricdefm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 681*0b57cec5SDimitry Andric 682*0b57cec5SDimitry Andriclet SchedRW = [WriteFStore] in { 683*0b57cec5SDimitry Andric// v2f64 extract element 1 is always custom lowered to unpack high to low 684*0b57cec5SDimitry Andric// and extract element 0 so the non-store version isn't too horrible. 685*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 686*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 687*0b57cec5SDimitry Andricdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 688*0b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 689*0b57cec5SDimitry Andric []>, VEX, VEX_WIG; 690*0b57cec5SDimitry Andricdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 691*0b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 692*0b57cec5SDimitry Andric [(store (f64 (extractelt 693*0b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 694*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; 695*0b57cec5SDimitry Andric} // UseAVX 696*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0 in 697*0b57cec5SDimitry Andricdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 698*0b57cec5SDimitry Andric "movhps\t{$src, $dst|$dst, $src}", 699*0b57cec5SDimitry Andric []>; 700*0b57cec5SDimitry Andricdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 701*0b57cec5SDimitry Andric "movhpd\t{$src, $dst|$dst, $src}", 702*0b57cec5SDimitry Andric [(store (f64 (extractelt 703*0b57cec5SDimitry Andric (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 704*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 705*0b57cec5SDimitry Andric} // SchedRW 706*0b57cec5SDimitry Andric 707*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 708*0b57cec5SDimitry Andric // Also handle an i64 load because that may get selected as a faster way to 709*0b57cec5SDimitry Andric // load the data. 710*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 711*0b57cec5SDimitry Andric (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 712*0b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 713*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 714*0b57cec5SDimitry Andric (VMOVHPDrm VR128:$src1, addr:$src2)>; 715*0b57cec5SDimitry Andric 716*0b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 717*0b57cec5SDimitry Andric (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 718*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 719*0b57cec5SDimitry Andric (VMOVHPDmr addr:$dst, VR128:$src)>; 720*0b57cec5SDimitry Andric 721*0b57cec5SDimitry Andric // MOVLPD patterns 722*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 723*0b57cec5SDimitry Andric (VMOVLPDrm VR128:$src1, addr:$src2)>; 724*0b57cec5SDimitry Andric} 725*0b57cec5SDimitry Andric 726*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 727*0b57cec5SDimitry Andric // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 728*0b57cec5SDimitry Andric // end up with a movsd or blend instead of shufp. 729*0b57cec5SDimitry Andric // No need for aligned load, we're only loading 64-bits. 730*0b57cec5SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))), 731*0b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 732*0b57cec5SDimitry Andric def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 733*0b57cec5SDimitry Andric (MOVHPSrm VR128:$src1, addr:$src2)>; 734*0b57cec5SDimitry Andric 735*0b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 736*0b57cec5SDimitry Andric addr:$dst), 737*0b57cec5SDimitry Andric (MOVHPSmr addr:$dst, VR128:$src)>; 738*0b57cec5SDimitry Andric} 739*0b57cec5SDimitry Andric 740*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 741*0b57cec5SDimitry Andric // MOVHPD patterns 742*0b57cec5SDimitry Andric 743*0b57cec5SDimitry Andric // Also handle an i64 load because that may get selected as a faster way to 744*0b57cec5SDimitry Andric // load the data. 745*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 746*0b57cec5SDimitry Andric (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 747*0b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 748*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 749*0b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 750*0b57cec5SDimitry Andric 751*0b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 752*0b57cec5SDimitry Andric (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 753*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 754*0b57cec5SDimitry Andric (MOVHPDmr addr:$dst, VR128:$src)>; 755*0b57cec5SDimitry Andric 756*0b57cec5SDimitry Andric // MOVLPD patterns 757*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 758*0b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 759*0b57cec5SDimitry Andric} 760*0b57cec5SDimitry Andric 761*0b57cec5SDimitry Andriclet Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 762*0b57cec5SDimitry Andric // Use MOVLPD to load into the low bits from a full vector unless we can use 763*0b57cec5SDimitry Andric // BLENDPD. 764*0b57cec5SDimitry Andric def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))), 765*0b57cec5SDimitry Andric (MOVLPDrm VR128:$src1, addr:$src2)>; 766*0b57cec5SDimitry Andric} 767*0b57cec5SDimitry Andric 768*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 769*0b57cec5SDimitry Andric// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 770*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 771*0b57cec5SDimitry Andric 772*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 773*0b57cec5SDimitry Andric def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 774*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 775*0b57cec5SDimitry Andric "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 776*0b57cec5SDimitry Andric [(set VR128:$dst, 777*0b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 778*0b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; 779*0b57cec5SDimitry Andric let isCommutable = 1 in 780*0b57cec5SDimitry Andric def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 781*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 782*0b57cec5SDimitry Andric "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 783*0b57cec5SDimitry Andric [(set VR128:$dst, 784*0b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 785*0b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, 786*0b57cec5SDimitry Andric NotMemoryFoldable; 787*0b57cec5SDimitry Andric} 788*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 789*0b57cec5SDimitry Andric def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 790*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 791*0b57cec5SDimitry Andric "movlhps\t{$src2, $dst|$dst, $src2}", 792*0b57cec5SDimitry Andric [(set VR128:$dst, 793*0b57cec5SDimitry Andric (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 794*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 795*0b57cec5SDimitry Andric let isCommutable = 1 in 796*0b57cec5SDimitry Andric def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 797*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 798*0b57cec5SDimitry Andric "movhlps\t{$src2, $dst|$dst, $src2}", 799*0b57cec5SDimitry Andric [(set VR128:$dst, 800*0b57cec5SDimitry Andric (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 801*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; 802*0b57cec5SDimitry Andric} 803*0b57cec5SDimitry Andric 804*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 805*0b57cec5SDimitry Andric// SSE 1 & 2 - Conversion Instructions 806*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 807*0b57cec5SDimitry Andric 808*0b57cec5SDimitry Andricmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 809*0b57cec5SDimitry Andric SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 810*0b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 811*0b57cec5SDimitry Andric SchedRead Int2Fpu = ReadDefault> { 812*0b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 813*0b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 814*0b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 815*0b57cec5SDimitry Andric Sched<[sched, Int2Fpu]>; 816*0b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 817*0b57cec5SDimitry Andric mem#"\t{$src, $dst|$dst, $src}", 818*0b57cec5SDimitry Andric [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 819*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 820*0b57cec5SDimitry Andric} 821*0b57cec5SDimitry Andric 822*0b57cec5SDimitry Andricmulticlass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 823*0b57cec5SDimitry Andric ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 824*0b57cec5SDimitry Andric string asm, Domain d, X86FoldableSchedWrite sched> { 825*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 826*0b57cec5SDimitry Andric def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 827*0b57cec5SDimitry Andric [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>, 828*0b57cec5SDimitry Andric Sched<[sched]>; 829*0b57cec5SDimitry Andric let mayLoad = 1 in 830*0b57cec5SDimitry Andric def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 831*0b57cec5SDimitry Andric [(set RC:$dst, (DstTy (sint_to_fp 832*0b57cec5SDimitry Andric (SrcTy (ld_frag addr:$src)))))], d>, 833*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 834*0b57cec5SDimitry Andric} 835*0b57cec5SDimitry Andric} 836*0b57cec5SDimitry Andric 837*0b57cec5SDimitry Andricmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 838*0b57cec5SDimitry Andric X86MemOperand x86memop, string asm, string mem, 839*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 840*0b57cec5SDimitry Andriclet hasSideEffects = 0, Predicates = [UseAVX] in { 841*0b57cec5SDimitry Andric def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 842*0b57cec5SDimitry Andric !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 843*0b57cec5SDimitry Andric Sched<[sched, ReadDefault, ReadInt2Fpu]>; 844*0b57cec5SDimitry Andric let mayLoad = 1 in 845*0b57cec5SDimitry Andric def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 846*0b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src), 847*0b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 848*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 849*0b57cec5SDimitry Andric} // hasSideEffects = 0 850*0b57cec5SDimitry Andric} 851*0b57cec5SDimitry Andric 852*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, Predicates = [UseAVX] in { 853*0b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 854*0b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 855*0b57cec5SDimitry Andric WriteCvtSS2I>, 856*0b57cec5SDimitry Andric XS, VEX, VEX_LIG; 857*0b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 858*0b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 859*0b57cec5SDimitry Andric WriteCvtSS2I>, 860*0b57cec5SDimitry Andric XS, VEX, VEX_W, VEX_LIG; 861*0b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 862*0b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 863*0b57cec5SDimitry Andric WriteCvtSD2I>, 864*0b57cec5SDimitry Andric XD, VEX, VEX_LIG; 865*0b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 866*0b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 867*0b57cec5SDimitry Andric WriteCvtSD2I>, 868*0b57cec5SDimitry Andric XD, VEX, VEX_W, VEX_LIG; 869*0b57cec5SDimitry Andric} 870*0b57cec5SDimitry Andric 871*0b57cec5SDimitry Andric// The assembler can recognize rr 64-bit instructions by seeing a rxx 872*0b57cec5SDimitry Andric// register, but the same isn't true when only using memory operands, 873*0b57cec5SDimitry Andric// provide other assembly "l" and "q" forms to address this explicitly 874*0b57cec5SDimitry Andric// where appropriate to do so. 875*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 876*0b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 877*0b57cec5SDimitry Andric WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; 878*0b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 879*0b57cec5SDimitry Andric WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; 880*0b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 881*0b57cec5SDimitry Andric WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; 882*0b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 883*0b57cec5SDimitry Andric WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; 884*0b57cec5SDimitry Andric} // isCodeGenOnly = 1 885*0b57cec5SDimitry Andric 886*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 887*0b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 888*0b57cec5SDimitry Andric (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 889*0b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 890*0b57cec5SDimitry Andric (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 891*0b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 892*0b57cec5SDimitry Andric (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 893*0b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 894*0b57cec5SDimitry Andric (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 895*0b57cec5SDimitry Andric 896*0b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp GR32:$src)), 897*0b57cec5SDimitry Andric (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 898*0b57cec5SDimitry Andric def : Pat<(f32 (sint_to_fp GR64:$src)), 899*0b57cec5SDimitry Andric (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 900*0b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp GR32:$src)), 901*0b57cec5SDimitry Andric (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 902*0b57cec5SDimitry Andric def : Pat<(f64 (sint_to_fp GR64:$src)), 903*0b57cec5SDimitry Andric (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 904*0b57cec5SDimitry Andric} 905*0b57cec5SDimitry Andric 906*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 907*0b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 908*0b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 909*0b57cec5SDimitry Andric WriteCvtSS2I>, XS; 910*0b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 911*0b57cec5SDimitry Andric "cvttss2si", "cvttss2si", 912*0b57cec5SDimitry Andric WriteCvtSS2I>, XS, REX_W; 913*0b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 914*0b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 915*0b57cec5SDimitry Andric WriteCvtSD2I>, XD; 916*0b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 917*0b57cec5SDimitry Andric "cvttsd2si", "cvttsd2si", 918*0b57cec5SDimitry Andric WriteCvtSD2I>, XD, REX_W; 919*0b57cec5SDimitry Andricdefm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 920*0b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{l}", 921*0b57cec5SDimitry Andric WriteCvtI2SS, ReadInt2Fpu>, XS; 922*0b57cec5SDimitry Andricdefm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 923*0b57cec5SDimitry Andric "cvtsi2ss", "cvtsi2ss{q}", 924*0b57cec5SDimitry Andric WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W; 925*0b57cec5SDimitry Andricdefm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 926*0b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{l}", 927*0b57cec5SDimitry Andric WriteCvtI2SD, ReadInt2Fpu>, XD; 928*0b57cec5SDimitry Andricdefm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 929*0b57cec5SDimitry Andric "cvtsi2sd", "cvtsi2sd{q}", 930*0b57cec5SDimitry Andric WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; 931*0b57cec5SDimitry Andric} // isCodeGenOnly = 1 932*0b57cec5SDimitry Andric 933*0b57cec5SDimitry Andric// Conversion Instructions Intrinsics - Match intrinsics which expect MM 934*0b57cec5SDimitry Andric// and/or XMM operand(s). 935*0b57cec5SDimitry Andric 936*0b57cec5SDimitry Andricmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 937*0b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, SDNode OpNode, 938*0b57cec5SDimitry Andric Operand memop, ComplexPattern mem_cpat, string asm, 939*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 940*0b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 941*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 942*0b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 943*0b57cec5SDimitry Andric Sched<[sched]>; 944*0b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 945*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 946*0b57cec5SDimitry Andric [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>, 947*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 948*0b57cec5SDimitry Andric} 949*0b57cec5SDimitry Andric 950*0b57cec5SDimitry Andricmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 951*0b57cec5SDimitry Andric RegisterClass DstRC, X86MemOperand x86memop, 952*0b57cec5SDimitry Andric string asm, string mem, X86FoldableSchedWrite sched, 953*0b57cec5SDimitry Andric bit Is2Addr = 1> { 954*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 955*0b57cec5SDimitry Andric def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 956*0b57cec5SDimitry Andric !if(Is2Addr, 957*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 958*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 959*0b57cec5SDimitry Andric []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 960*0b57cec5SDimitry Andric let mayLoad = 1 in 961*0b57cec5SDimitry Andric def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 962*0b57cec5SDimitry Andric (ins DstRC:$src1, x86memop:$src2), 963*0b57cec5SDimitry Andric !if(Is2Addr, 964*0b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 965*0b57cec5SDimitry Andric asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 966*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 967*0b57cec5SDimitry Andric} 968*0b57cec5SDimitry Andric} 969*0b57cec5SDimitry Andric 970*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 971*0b57cec5SDimitry Andricdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 972*0b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 973*0b57cec5SDimitry Andric WriteCvtSD2I>, XD, VEX, VEX_LIG; 974*0b57cec5SDimitry Andricdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 975*0b57cec5SDimitry Andric X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 976*0b57cec5SDimitry Andric WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; 977*0b57cec5SDimitry Andric} 978*0b57cec5SDimitry Andricdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 979*0b57cec5SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; 980*0b57cec5SDimitry Andricdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 981*0b57cec5SDimitry Andric sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; 982*0b57cec5SDimitry Andric 983*0b57cec5SDimitry Andric 984*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 985*0b57cec5SDimitry Andricdefm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 986*0b57cec5SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; 987*0b57cec5SDimitry Andricdefm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 988*0b57cec5SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; 989*0b57cec5SDimitry Andricdefm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 990*0b57cec5SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; 991*0b57cec5SDimitry Andricdefm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 992*0b57cec5SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; 993*0b57cec5SDimitry Andric} 994*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 995*0b57cec5SDimitry Andric defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 996*0b57cec5SDimitry Andric i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS; 997*0b57cec5SDimitry Andric defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 998*0b57cec5SDimitry Andric i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W; 999*0b57cec5SDimitry Andric defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1000*0b57cec5SDimitry Andric i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; 1001*0b57cec5SDimitry Andric defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1002*0b57cec5SDimitry Andric i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W; 1003*0b57cec5SDimitry Andric} 1004*0b57cec5SDimitry Andric 1005*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1006*0b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1007*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1008*0b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1009*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1010*0b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1011*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1012*0b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1013*0b57cec5SDimitry Andric 1014*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1015*0b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1016*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1017*0b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1018*0b57cec5SDimitry Andric 1019*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1020*0b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 1021*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1022*0b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 1023*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1024*0b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 1025*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1026*0b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 1027*0b57cec5SDimitry Andric 1028*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1029*0b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1030*0b57cec5SDimitry Andricdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1031*0b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1032*0b57cec5SDimitry Andric 1033*0b57cec5SDimitry Andric/// SSE 1 Only 1034*0b57cec5SDimitry Andric 1035*0b57cec5SDimitry Andric// Aliases for intrinsics 1036*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 1037*0b57cec5SDimitry Andricdefm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1038*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1039*0b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_LIG; 1040*0b57cec5SDimitry Andricdefm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1041*0b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1042*0b57cec5SDimitry Andric "cvttss2si", WriteCvtSS2I>, 1043*0b57cec5SDimitry Andric XS, VEX, VEX_LIG, VEX_W; 1044*0b57cec5SDimitry Andricdefm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1045*0b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1046*0b57cec5SDimitry Andric WriteCvtSS2I>, XD, VEX, VEX_LIG; 1047*0b57cec5SDimitry Andricdefm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1048*0b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1049*0b57cec5SDimitry Andric "cvttsd2si", WriteCvtSS2I>, 1050*0b57cec5SDimitry Andric XD, VEX, VEX_LIG, VEX_W; 1051*0b57cec5SDimitry Andric} 1052*0b57cec5SDimitry Andricdefm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1053*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvttss2si", 1054*0b57cec5SDimitry Andric WriteCvtSS2I>, XS; 1055*0b57cec5SDimitry Andricdefm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1056*0b57cec5SDimitry Andric X86cvtts2Int, ssmem, sse_load_f32, 1057*0b57cec5SDimitry Andric "cvttss2si", WriteCvtSS2I>, XS, REX_W; 1058*0b57cec5SDimitry Andricdefm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1059*0b57cec5SDimitry Andric sdmem, sse_load_f64, "cvttsd2si", 1060*0b57cec5SDimitry Andric WriteCvtSD2I>, XD; 1061*0b57cec5SDimitry Andricdefm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1062*0b57cec5SDimitry Andric X86cvtts2Int, sdmem, sse_load_f64, 1063*0b57cec5SDimitry Andric "cvttsd2si", WriteCvtSD2I>, XD, REX_W; 1064*0b57cec5SDimitry Andric 1065*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1066*0b57cec5SDimitry Andric (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1067*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1068*0b57cec5SDimitry Andric (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1069*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1070*0b57cec5SDimitry Andric (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1071*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1072*0b57cec5SDimitry Andric (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1073*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1074*0b57cec5SDimitry Andric (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1075*0b57cec5SDimitry Andricdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1076*0b57cec5SDimitry Andric (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1077*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1078*0b57cec5SDimitry Andric (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1079*0b57cec5SDimitry Andricdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1080*0b57cec5SDimitry Andric (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1081*0b57cec5SDimitry Andric 1082*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1083*0b57cec5SDimitry Andric (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1084*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1085*0b57cec5SDimitry Andric (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1086*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1087*0b57cec5SDimitry Andric (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1088*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1089*0b57cec5SDimitry Andric (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1090*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1091*0b57cec5SDimitry Andric (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1092*0b57cec5SDimitry Andricdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1093*0b57cec5SDimitry Andric (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1094*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1095*0b57cec5SDimitry Andric (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1096*0b57cec5SDimitry Andricdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1097*0b57cec5SDimitry Andric (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1098*0b57cec5SDimitry Andric 1099*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 1100*0b57cec5SDimitry Andricdefm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1101*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1102*0b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_LIG; 1103*0b57cec5SDimitry Andricdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1104*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1105*0b57cec5SDimitry Andric WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; 1106*0b57cec5SDimitry Andric} 1107*0b57cec5SDimitry Andricdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1108*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1109*0b57cec5SDimitry Andric WriteCvtSS2I>, XS; 1110*0b57cec5SDimitry Andricdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1111*0b57cec5SDimitry Andric ssmem, sse_load_f32, "cvtss2si", 1112*0b57cec5SDimitry Andric WriteCvtSS2I>, XS, REX_W; 1113*0b57cec5SDimitry Andric 1114*0b57cec5SDimitry Andricdefm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 1115*0b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1116*0b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 1117*0b57cec5SDimitry Andric PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1118*0b57cec5SDimitry Andricdefm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 1119*0b57cec5SDimitry Andric "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1120*0b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PSY>, 1121*0b57cec5SDimitry Andric PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1122*0b57cec5SDimitry Andric 1123*0b57cec5SDimitry Andricdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 1124*0b57cec5SDimitry Andric "cvtdq2ps\t{$src, $dst|$dst, $src}", 1125*0b57cec5SDimitry Andric SSEPackedSingle, WriteCvtI2PS>, 1126*0b57cec5SDimitry Andric PS, Requires<[UseSSE2]>; 1127*0b57cec5SDimitry Andric 1128*0b57cec5SDimitry Andric// AVX aliases 1129*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1130*0b57cec5SDimitry Andric (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1131*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1132*0b57cec5SDimitry Andric (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1133*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1134*0b57cec5SDimitry Andric (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1135*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1136*0b57cec5SDimitry Andric (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1137*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1138*0b57cec5SDimitry Andric (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1139*0b57cec5SDimitry Andricdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1140*0b57cec5SDimitry Andric (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1141*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1142*0b57cec5SDimitry Andric (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1143*0b57cec5SDimitry Andricdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1144*0b57cec5SDimitry Andric (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1145*0b57cec5SDimitry Andric 1146*0b57cec5SDimitry Andric// SSE aliases 1147*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1148*0b57cec5SDimitry Andric (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1149*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1150*0b57cec5SDimitry Andric (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1151*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1152*0b57cec5SDimitry Andric (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1153*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1154*0b57cec5SDimitry Andric (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1155*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1156*0b57cec5SDimitry Andric (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1157*0b57cec5SDimitry Andricdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1158*0b57cec5SDimitry Andric (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1159*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1160*0b57cec5SDimitry Andric (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1161*0b57cec5SDimitry Andricdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1162*0b57cec5SDimitry Andric (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1163*0b57cec5SDimitry Andric 1164*0b57cec5SDimitry Andric/// SSE 2 Only 1165*0b57cec5SDimitry Andric 1166*0b57cec5SDimitry Andric// Convert scalar double to scalar single 1167*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in { 1168*0b57cec5SDimitry Andricdef VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1169*0b57cec5SDimitry Andric (ins FR32:$src1, FR64:$src2), 1170*0b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1171*0b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG, 1172*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 1173*0b57cec5SDimitry Andriclet mayLoad = 1 in 1174*0b57cec5SDimitry Andricdef VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1175*0b57cec5SDimitry Andric (ins FR32:$src1, f64mem:$src2), 1176*0b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1177*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, 1178*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1179*0b57cec5SDimitry Andric} 1180*0b57cec5SDimitry Andric 1181*0b57cec5SDimitry Andricdef : Pat<(f32 (fpround FR64:$src)), 1182*0b57cec5SDimitry Andric (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 1183*0b57cec5SDimitry Andric Requires<[UseAVX]>; 1184*0b57cec5SDimitry Andric 1185*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 1186*0b57cec5SDimitry Andricdef CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1187*0b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1188*0b57cec5SDimitry Andric [(set FR32:$dst, (fpround FR64:$src))]>, 1189*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 1190*0b57cec5SDimitry Andricdef CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1191*0b57cec5SDimitry Andric "cvtsd2ss\t{$src, $dst|$dst, $src}", 1192*0b57cec5SDimitry Andric [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, 1193*0b57cec5SDimitry Andric XD, Requires<[UseSSE2, OptForSize]>, 1194*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded]>; 1195*0b57cec5SDimitry Andric} 1196*0b57cec5SDimitry Andric 1197*0b57cec5SDimitry Andricdef VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1198*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1199*0b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1200*0b57cec5SDimitry Andric [(set VR128:$dst, 1201*0b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1202*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1203*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS]>; 1204*0b57cec5SDimitry Andricdef VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1205*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1206*0b57cec5SDimitry Andric "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1207*0b57cec5SDimitry Andric [(set VR128:$dst, 1208*0b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>, 1209*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1210*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1211*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 1212*0b57cec5SDimitry Andricdef CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1213*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1214*0b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1215*0b57cec5SDimitry Andric [(set VR128:$dst, 1216*0b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1217*0b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 1218*0b57cec5SDimitry Andricdef CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1219*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1220*0b57cec5SDimitry Andric "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1221*0b57cec5SDimitry Andric [(set VR128:$dst, 1222*0b57cec5SDimitry Andric (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>, 1223*0b57cec5SDimitry Andric XD, Requires<[UseSSE2]>, 1224*0b57cec5SDimitry Andric Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1225*0b57cec5SDimitry Andric} 1226*0b57cec5SDimitry Andric 1227*0b57cec5SDimitry Andric// Convert scalar single to scalar double 1228*0b57cec5SDimitry Andric// SSE2 instructions with XS prefix 1229*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, hasSideEffects = 0 in { 1230*0b57cec5SDimitry Andricdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1231*0b57cec5SDimitry Andric (ins FR64:$src1, FR32:$src2), 1232*0b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1233*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 1234*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; 1235*0b57cec5SDimitry Andriclet mayLoad = 1 in 1236*0b57cec5SDimitry Andricdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1237*0b57cec5SDimitry Andric (ins FR64:$src1, f32mem:$src2), 1238*0b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1239*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, 1240*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 1241*0b57cec5SDimitry Andric Requires<[UseAVX, OptForSize]>; 1242*0b57cec5SDimitry Andric} // isCodeGenOnly = 1, hasSideEffects = 0 1243*0b57cec5SDimitry Andric 1244*0b57cec5SDimitry Andricdef : Pat<(f64 (fpextend FR32:$src)), 1245*0b57cec5SDimitry Andric (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 1246*0b57cec5SDimitry Andricdef : Pat<(fpextend (loadf32 addr:$src)), 1247*0b57cec5SDimitry Andric (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 1248*0b57cec5SDimitry Andric 1249*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 1250*0b57cec5SDimitry Andricdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1251*0b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1252*0b57cec5SDimitry Andric [(set FR64:$dst, (fpextend FR32:$src))]>, 1253*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; 1254*0b57cec5SDimitry Andricdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1255*0b57cec5SDimitry Andric "cvtss2sd\t{$src, $dst|$dst, $src}", 1256*0b57cec5SDimitry Andric [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, 1257*0b57cec5SDimitry Andric XS, Requires<[UseSSE2, OptForSize]>, 1258*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded]>; 1259*0b57cec5SDimitry Andric} // isCodeGenOnly = 1 1260*0b57cec5SDimitry Andric 1261*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 1262*0b57cec5SDimitry Andricdef VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1263*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1264*0b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1265*0b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, 1266*0b57cec5SDimitry Andric Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 1267*0b57cec5SDimitry Andriclet mayLoad = 1 in 1268*0b57cec5SDimitry Andricdef VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1269*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1270*0b57cec5SDimitry Andric "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1271*0b57cec5SDimitry Andric []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, 1272*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1273*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1274*0b57cec5SDimitry Andricdef CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1275*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1276*0b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1277*0b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 1278*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD]>; 1279*0b57cec5SDimitry Andriclet mayLoad = 1 in 1280*0b57cec5SDimitry Andricdef CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1281*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1282*0b57cec5SDimitry Andric "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1283*0b57cec5SDimitry Andric []>, XS, Requires<[UseSSE2]>, 1284*0b57cec5SDimitry Andric Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1285*0b57cec5SDimitry Andric} 1286*0b57cec5SDimitry Andric} // hasSideEffects = 0 1287*0b57cec5SDimitry Andric 1288*0b57cec5SDimitry Andric// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 1289*0b57cec5SDimitry Andric// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 1290*0b57cec5SDimitry Andric// vmovs{s,d} instructions 1291*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 1292*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1293*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1294*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1295*0b57cec5SDimitry Andric (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1296*0b57cec5SDimitry Andric (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1297*0b57cec5SDimitry Andric 1298*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1299*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1300*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1301*0b57cec5SDimitry Andric (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1302*0b57cec5SDimitry Andric (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1303*0b57cec5SDimitry Andric 1304*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1305*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1306*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 1307*0b57cec5SDimitry Andric (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1308*0b57cec5SDimitry Andric 1309*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1310*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1311*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 1312*0b57cec5SDimitry Andric (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1313*0b57cec5SDimitry Andric 1314*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1315*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1316*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 1317*0b57cec5SDimitry Andric (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1318*0b57cec5SDimitry Andric 1319*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1320*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1321*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 1322*0b57cec5SDimitry Andric (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1323*0b57cec5SDimitry Andric 1324*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1325*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1326*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 1327*0b57cec5SDimitry Andric (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1328*0b57cec5SDimitry Andric 1329*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1330*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1331*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 1332*0b57cec5SDimitry Andric (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1333*0b57cec5SDimitry Andric 1334*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1335*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1336*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 1337*0b57cec5SDimitry Andric (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1338*0b57cec5SDimitry Andric 1339*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1340*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1341*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 1342*0b57cec5SDimitry Andric (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1343*0b57cec5SDimitry Andric} // Predicates = [UseAVX] 1344*0b57cec5SDimitry Andric 1345*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1346*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1347*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1348*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector 1349*0b57cec5SDimitry Andric (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1350*0b57cec5SDimitry Andric (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1351*0b57cec5SDimitry Andric 1352*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1353*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1354*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector 1355*0b57cec5SDimitry Andric (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1356*0b57cec5SDimitry Andric (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1357*0b57cec5SDimitry Andric 1358*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1359*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1360*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), 1361*0b57cec5SDimitry Andric (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1362*0b57cec5SDimitry Andric 1363*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1364*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1365*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), 1366*0b57cec5SDimitry Andric (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1367*0b57cec5SDimitry Andric 1368*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1369*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1370*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), 1371*0b57cec5SDimitry Andric (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1372*0b57cec5SDimitry Andric 1373*0b57cec5SDimitry Andricdef : Pat<(v2f64 (X86Movsd 1374*0b57cec5SDimitry Andric (v2f64 VR128:$dst), 1375*0b57cec5SDimitry Andric (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), 1376*0b57cec5SDimitry Andric (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1377*0b57cec5SDimitry Andric} // Predicates = [UseSSE2] 1378*0b57cec5SDimitry Andric 1379*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 1380*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1381*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1382*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), 1383*0b57cec5SDimitry Andric (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1384*0b57cec5SDimitry Andric 1385*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1386*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1387*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), 1388*0b57cec5SDimitry Andric (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1389*0b57cec5SDimitry Andric 1390*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1391*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1392*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), 1393*0b57cec5SDimitry Andric (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1394*0b57cec5SDimitry Andric 1395*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86Movss 1396*0b57cec5SDimitry Andric (v4f32 VR128:$dst), 1397*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), 1398*0b57cec5SDimitry Andric (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1399*0b57cec5SDimitry Andric} // Predicates = [UseSSE1] 1400*0b57cec5SDimitry Andric 1401*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1402*0b57cec5SDimitry Andric// Convert packed single/double fp to doubleword 1403*0b57cec5SDimitry Andricdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1404*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1405*0b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1406*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 1407*0b57cec5SDimitry Andricdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1408*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1409*0b57cec5SDimitry Andric [(set VR128:$dst, 1410*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 1411*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 1412*0b57cec5SDimitry Andricdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1413*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1414*0b57cec5SDimitry Andric [(set VR256:$dst, 1415*0b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 1416*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 1417*0b57cec5SDimitry Andricdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1418*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1419*0b57cec5SDimitry Andric [(set VR256:$dst, 1420*0b57cec5SDimitry Andric (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 1421*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 1422*0b57cec5SDimitry Andric} 1423*0b57cec5SDimitry Andricdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1424*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1425*0b57cec5SDimitry Andric [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1426*0b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 1427*0b57cec5SDimitry Andricdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1428*0b57cec5SDimitry Andric "cvtps2dq\t{$src, $dst|$dst, $src}", 1429*0b57cec5SDimitry Andric [(set VR128:$dst, 1430*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 1431*0b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 1432*0b57cec5SDimitry Andric 1433*0b57cec5SDimitry Andric 1434*0b57cec5SDimitry Andric// Convert Packed Double FP to Packed DW Integers 1435*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1436*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 1437*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 1438*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1439*0b57cec5SDimitry Andricdef VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1440*0b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1441*0b57cec5SDimitry Andric [(set VR128:$dst, 1442*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1443*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1444*0b57cec5SDimitry Andric 1445*0b57cec5SDimitry Andric// XMM only 1446*0b57cec5SDimitry Andricdef VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1447*0b57cec5SDimitry Andric "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 1448*0b57cec5SDimitry Andric [(set VR128:$dst, 1449*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 1450*0b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1451*0b57cec5SDimitry Andric 1452*0b57cec5SDimitry Andric// YMM only 1453*0b57cec5SDimitry Andricdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1454*0b57cec5SDimitry Andric "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1455*0b57cec5SDimitry Andric [(set VR128:$dst, 1456*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 1457*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1458*0b57cec5SDimitry Andricdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1459*0b57cec5SDimitry Andric "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1460*0b57cec5SDimitry Andric [(set VR128:$dst, 1461*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 1462*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1463*0b57cec5SDimitry Andric} 1464*0b57cec5SDimitry Andric 1465*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1466*0b57cec5SDimitry Andric (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1467*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 1468*0b57cec5SDimitry Andric (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1469*0b57cec5SDimitry Andric 1470*0b57cec5SDimitry Andricdef CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1471*0b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 1472*0b57cec5SDimitry Andric [(set VR128:$dst, 1473*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 1474*0b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>; 1475*0b57cec5SDimitry Andricdef CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1476*0b57cec5SDimitry Andric "cvtpd2dq\t{$src, $dst|$dst, $src}", 1477*0b57cec5SDimitry Andric [(set VR128:$dst, 1478*0b57cec5SDimitry Andric (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1479*0b57cec5SDimitry Andric Sched<[WriteCvtPD2I]>; 1480*0b57cec5SDimitry Andric 1481*0b57cec5SDimitry Andric// Convert with truncation packed single/double fp to doubleword 1482*0b57cec5SDimitry Andric// SSE2 packed instructions with XS prefix 1483*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1484*0b57cec5SDimitry Andricdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1485*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1486*0b57cec5SDimitry Andric [(set VR128:$dst, 1487*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, 1488*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 1489*0b57cec5SDimitry Andricdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1490*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1491*0b57cec5SDimitry Andric [(set VR128:$dst, 1492*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>, 1493*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 1494*0b57cec5SDimitry Andricdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1495*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1496*0b57cec5SDimitry Andric [(set VR256:$dst, 1497*0b57cec5SDimitry Andric (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>, 1498*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 1499*0b57cec5SDimitry Andricdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1500*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1501*0b57cec5SDimitry Andric [(set VR256:$dst, 1502*0b57cec5SDimitry Andric (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>, 1503*0b57cec5SDimitry Andric VEX, VEX_L, 1504*0b57cec5SDimitry Andric Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 1505*0b57cec5SDimitry Andric} 1506*0b57cec5SDimitry Andric 1507*0b57cec5SDimitry Andricdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1508*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1509*0b57cec5SDimitry Andric [(set VR128:$dst, 1510*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, 1511*0b57cec5SDimitry Andric Sched<[WriteCvtPS2I]>; 1512*0b57cec5SDimitry Andricdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1513*0b57cec5SDimitry Andric "cvttps2dq\t{$src, $dst|$dst, $src}", 1514*0b57cec5SDimitry Andric [(set VR128:$dst, 1515*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, 1516*0b57cec5SDimitry Andric Sched<[WriteCvtPS2ILd]>; 1517*0b57cec5SDimitry Andric 1518*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 1519*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 1520*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1521*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1522*0b57cec5SDimitry Andric// XMM only 1523*0b57cec5SDimitry Andricdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1524*0b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 1525*0b57cec5SDimitry Andric [(set VR128:$dst, 1526*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, 1527*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1528*0b57cec5SDimitry Andricdef VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1529*0b57cec5SDimitry Andric "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 1530*0b57cec5SDimitry Andric [(set VR128:$dst, 1531*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>, 1532*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1533*0b57cec5SDimitry Andric 1534*0b57cec5SDimitry Andric// YMM only 1535*0b57cec5SDimitry Andricdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1536*0b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 1537*0b57cec5SDimitry Andric [(set VR128:$dst, 1538*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>, 1539*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1540*0b57cec5SDimitry Andricdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1541*0b57cec5SDimitry Andric "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 1542*0b57cec5SDimitry Andric [(set VR128:$dst, 1543*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>, 1544*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1545*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 1546*0b57cec5SDimitry Andric 1547*0b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 1548*0b57cec5SDimitry Andric (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1549*0b57cec5SDimitry Andricdef : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 1550*0b57cec5SDimitry Andric (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1551*0b57cec5SDimitry Andric 1552*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1553*0b57cec5SDimitry Andric def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 1554*0b57cec5SDimitry Andric (VCVTTPD2DQYrr VR256:$src)>; 1555*0b57cec5SDimitry Andric def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 1556*0b57cec5SDimitry Andric (VCVTTPD2DQYrm addr:$src)>; 1557*0b57cec5SDimitry Andric} 1558*0b57cec5SDimitry Andric 1559*0b57cec5SDimitry Andricdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1560*0b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 1561*0b57cec5SDimitry Andric [(set VR128:$dst, 1562*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, 1563*0b57cec5SDimitry Andric Sched<[WriteCvtPD2I]>; 1564*0b57cec5SDimitry Andricdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 1565*0b57cec5SDimitry Andric "cvttpd2dq\t{$src, $dst|$dst, $src}", 1566*0b57cec5SDimitry Andric [(set VR128:$dst, 1567*0b57cec5SDimitry Andric (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, 1568*0b57cec5SDimitry Andric Sched<[WriteCvtPD2ILd]>; 1569*0b57cec5SDimitry Andric 1570*0b57cec5SDimitry Andric// Convert packed single to packed double 1571*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1572*0b57cec5SDimitry Andric // SSE2 instructions without OpSize prefix 1573*0b57cec5SDimitry Andricdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1574*0b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1575*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, 1576*0b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; 1577*0b57cec5SDimitry Andricdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1578*0b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1579*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1580*0b57cec5SDimitry Andric PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; 1581*0b57cec5SDimitry Andricdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1582*0b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1583*0b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>, 1584*0b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; 1585*0b57cec5SDimitry Andricdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 1586*0b57cec5SDimitry Andric "vcvtps2pd\t{$src, $dst|$dst, $src}", 1587*0b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 1588*0b57cec5SDimitry Andric PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; 1589*0b57cec5SDimitry Andric} 1590*0b57cec5SDimitry Andric 1591*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1592*0b57cec5SDimitry Andricdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1593*0b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 1594*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, 1595*0b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD]>; 1596*0b57cec5SDimitry Andricdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1597*0b57cec5SDimitry Andric "cvtps2pd\t{$src, $dst|$dst, $src}", 1598*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1599*0b57cec5SDimitry Andric PS, Sched<[WriteCvtPS2PD.Folded]>; 1600*0b57cec5SDimitry Andric} 1601*0b57cec5SDimitry Andric 1602*0b57cec5SDimitry Andric// Convert Packed DW Integers to Packed Double FP 1603*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1604*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 1605*0b57cec5SDimitry Andricdef VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1606*0b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1607*0b57cec5SDimitry Andric [(set VR128:$dst, 1608*0b57cec5SDimitry Andric (v2f64 (X86VSintToFP 1609*0b57cec5SDimitry Andric (bc_v4i32 1610*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector 1611*0b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 1612*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; 1613*0b57cec5SDimitry Andricdef VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1614*0b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1615*0b57cec5SDimitry Andric [(set VR128:$dst, 1616*0b57cec5SDimitry Andric (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, 1617*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; 1618*0b57cec5SDimitry Andricdef VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 1619*0b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1620*0b57cec5SDimitry Andric [(set VR256:$dst, 1621*0b57cec5SDimitry Andric (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>, 1622*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 1623*0b57cec5SDimitry Andric VEX_WIG; 1624*0b57cec5SDimitry Andricdef VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1625*0b57cec5SDimitry Andric "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1626*0b57cec5SDimitry Andric [(set VR256:$dst, 1627*0b57cec5SDimitry Andric (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>, 1628*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; 1629*0b57cec5SDimitry Andric} 1630*0b57cec5SDimitry Andric 1631*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayLoad = 1 in 1632*0b57cec5SDimitry Andricdef CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1633*0b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 1634*0b57cec5SDimitry Andric [(set VR128:$dst, 1635*0b57cec5SDimitry Andric (v2f64 (X86VSintToFP 1636*0b57cec5SDimitry Andric (bc_v4i32 1637*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector 1638*0b57cec5SDimitry Andric (loadi64 addr:$src)))))))]>, 1639*0b57cec5SDimitry Andric Sched<[WriteCvtI2PDLd]>; 1640*0b57cec5SDimitry Andricdef CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1641*0b57cec5SDimitry Andric "cvtdq2pd\t{$src, $dst|$dst, $src}", 1642*0b57cec5SDimitry Andric [(set VR128:$dst, 1643*0b57cec5SDimitry Andric (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, 1644*0b57cec5SDimitry Andric Sched<[WriteCvtI2PD]>; 1645*0b57cec5SDimitry Andric 1646*0b57cec5SDimitry Andric// AVX register conversion intrinsics 1647*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1648*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1649*0b57cec5SDimitry Andric (VCVTDQ2PDrm addr:$src)>; 1650*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 1651*0b57cec5SDimitry Andric 1652*0b57cec5SDimitry Andric// SSE2 register conversion intrinsics 1653*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1654*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1655*0b57cec5SDimitry Andric (CVTDQ2PDrm addr:$src)>; 1656*0b57cec5SDimitry Andric} // Predicates = [UseSSE2] 1657*0b57cec5SDimitry Andric 1658*0b57cec5SDimitry Andric// Convert packed double to packed single 1659*0b57cec5SDimitry Andric// The assembler can recognize rr 256-bit instructions by seeing a ymm 1660*0b57cec5SDimitry Andric// register, but the same isn't true when using memory operands instead. 1661*0b57cec5SDimitry Andric// Provide other assembly rr and rm forms to address this explicitly. 1662*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1663*0b57cec5SDimitry Andric// XMM only 1664*0b57cec5SDimitry Andricdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1665*0b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1666*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, 1667*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; 1668*0b57cec5SDimitry Andricdef VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1669*0b57cec5SDimitry Andric "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 1670*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>, 1671*0b57cec5SDimitry Andric VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; 1672*0b57cec5SDimitry Andric 1673*0b57cec5SDimitry Andricdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1674*0b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1675*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround VR256:$src))]>, 1676*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; 1677*0b57cec5SDimitry Andricdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1678*0b57cec5SDimitry Andric "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 1679*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>, 1680*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; 1681*0b57cec5SDimitry Andric} // Predicates = [HasAVX, NoVLX] 1682*0b57cec5SDimitry Andric 1683*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 1684*0b57cec5SDimitry Andric (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 1685*0b57cec5SDimitry Andricdef : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 1686*0b57cec5SDimitry Andric (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 1687*0b57cec5SDimitry Andric 1688*0b57cec5SDimitry Andricdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1689*0b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1690*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, 1691*0b57cec5SDimitry Andric Sched<[WriteCvtPD2PS]>; 1692*0b57cec5SDimitry Andricdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1693*0b57cec5SDimitry Andric "cvtpd2ps\t{$src, $dst|$dst, $src}", 1694*0b57cec5SDimitry Andric [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, 1695*0b57cec5SDimitry Andric Sched<[WriteCvtPD2PS.Folded]>; 1696*0b57cec5SDimitry Andric 1697*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1698*0b57cec5SDimitry Andric def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), 1699*0b57cec5SDimitry Andric (VCVTPD2PSYrr VR256:$src)>; 1700*0b57cec5SDimitry Andric def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), 1701*0b57cec5SDimitry Andric (VCVTPD2PSYrm addr:$src)>; 1702*0b57cec5SDimitry Andric} 1703*0b57cec5SDimitry Andric 1704*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1705*0b57cec5SDimitry Andric// SSE 1 & 2 - Compare Instructions 1706*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1707*0b57cec5SDimitry Andric 1708*0b57cec5SDimitry Andric// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 1709*0b57cec5SDimitry Andricmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 1710*0b57cec5SDimitry Andric SDNode OpNode, ValueType VT, 1711*0b57cec5SDimitry Andric PatFrag ld_frag, string asm, 1712*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 1713*0b57cec5SDimitry Andric let isCommutable = 1 in 1714*0b57cec5SDimitry Andric def rr : SIi8<0xC2, MRMSrcReg, 1715*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1716*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>, 1717*0b57cec5SDimitry Andric Sched<[sched]>; 1718*0b57cec5SDimitry Andric def rm : SIi8<0xC2, MRMSrcMem, 1719*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1720*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode (VT RC:$src1), 1721*0b57cec5SDimitry Andric (ld_frag addr:$src2), imm:$cc))]>, 1722*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1723*0b57cec5SDimitry Andric} 1724*0b57cec5SDimitry Andric 1725*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 1726*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 1727*0b57cec5SDimitry Andric defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32, 1728*0b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1729*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG; 1730*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 1731*0b57cec5SDimitry Andric defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64, 1732*0b57cec5SDimitry Andric "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1733*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl>, 1734*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 1735*0b57cec5SDimitry Andric 1736*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 1737*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 1738*0b57cec5SDimitry Andric defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32, 1739*0b57cec5SDimitry Andric "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1740*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl>, XS; 1741*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 1742*0b57cec5SDimitry Andric defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64, 1743*0b57cec5SDimitry Andric "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1744*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl>, XD; 1745*0b57cec5SDimitry Andric } 1746*0b57cec5SDimitry Andric} 1747*0b57cec5SDimitry Andric 1748*0b57cec5SDimitry Andricmulticlass sse12_cmp_scalar_int<Operand memop, 1749*0b57cec5SDimitry Andric Intrinsic Int, string asm, X86FoldableSchedWrite sched, 1750*0b57cec5SDimitry Andric ComplexPattern mem_cpat> { 1751*0b57cec5SDimitry Andric def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 1752*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src, u8imm:$cc), asm, 1753*0b57cec5SDimitry Andric [(set VR128:$dst, (Int VR128:$src1, 1754*0b57cec5SDimitry Andric VR128:$src, imm:$cc))]>, 1755*0b57cec5SDimitry Andric Sched<[sched]>; 1756*0b57cec5SDimitry Andriclet mayLoad = 1 in 1757*0b57cec5SDimitry Andric def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 1758*0b57cec5SDimitry Andric (ins VR128:$src1, memop:$src, u8imm:$cc), asm, 1759*0b57cec5SDimitry Andric [(set VR128:$dst, (Int VR128:$src1, 1760*0b57cec5SDimitry Andric mem_cpat:$src, imm:$cc))]>, 1761*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1762*0b57cec5SDimitry Andric} 1763*0b57cec5SDimitry Andric 1764*0b57cec5SDimitry Andric// Aliases to match intrinsics which expect XMM operand(s). 1765*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 1766*0b57cec5SDimitry Andricdefm VCMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss, 1767*0b57cec5SDimitry Andric "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}", 1768*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 1769*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 1770*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 1771*0b57cec5SDimitry Andricdefm VCMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd, 1772*0b57cec5SDimitry Andric "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}", 1773*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 1774*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 1775*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 1776*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 1777*0b57cec5SDimitry Andric defm CMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss, 1778*0b57cec5SDimitry Andric "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}", 1779*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; 1780*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 1781*0b57cec5SDimitry Andric defm CMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd, 1782*0b57cec5SDimitry Andric "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}", 1783*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; 1784*0b57cec5SDimitry Andric} 1785*0b57cec5SDimitry Andric 1786*0b57cec5SDimitry Andric 1787*0b57cec5SDimitry Andric// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 1788*0b57cec5SDimitry Andricmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 1789*0b57cec5SDimitry Andric ValueType vt, X86MemOperand x86memop, 1790*0b57cec5SDimitry Andric PatFrag ld_frag, string OpcodeStr, 1791*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 1792*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 1793*0b57cec5SDimitry Andric def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1794*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1795*0b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1796*0b57cec5SDimitry Andric Sched<[sched]>; 1797*0b57cec5SDimitry Andriclet mayLoad = 1 in 1798*0b57cec5SDimitry Andric def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1799*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1800*0b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 1801*0b57cec5SDimitry Andric (ld_frag addr:$src2)))]>, 1802*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1803*0b57cec5SDimitry Andric} 1804*0b57cec5SDimitry Andric} 1805*0b57cec5SDimitry Andric 1806*0b57cec5SDimitry Andric// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 1807*0b57cec5SDimitry Andricmulticlass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 1808*0b57cec5SDimitry Andric ValueType vt, Operand memop, 1809*0b57cec5SDimitry Andric ComplexPattern mem_cpat, string OpcodeStr, 1810*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 1811*0b57cec5SDimitry Andric def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1812*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1813*0b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1814*0b57cec5SDimitry Andric Sched<[sched]>; 1815*0b57cec5SDimitry Andriclet mayLoad = 1 in 1816*0b57cec5SDimitry Andric def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 1817*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1818*0b57cec5SDimitry Andric [(set EFLAGS, (OpNode (vt RC:$src1), 1819*0b57cec5SDimitry Andric mem_cpat:$src2))]>, 1820*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1821*0b57cec5SDimitry Andric} 1822*0b57cec5SDimitry Andric 1823*0b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 1824*0b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 1825*0b57cec5SDimitry Andric "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 1826*0b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 1827*0b57cec5SDimitry Andric "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 1828*0b57cec5SDimitry Andric let Pattern = []<dag> in { 1829*0b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, 1830*0b57cec5SDimitry Andric "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 1831*0b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, 1832*0b57cec5SDimitry Andric "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 1833*0b57cec5SDimitry Andric } 1834*0b57cec5SDimitry Andric 1835*0b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 1836*0b57cec5SDimitry Andric defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1837*0b57cec5SDimitry Andric sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 1838*0b57cec5SDimitry Andric defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1839*0b57cec5SDimitry Andric sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 1840*0b57cec5SDimitry Andric 1841*0b57cec5SDimitry Andric defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1842*0b57cec5SDimitry Andric sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; 1843*0b57cec5SDimitry Andric defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1844*0b57cec5SDimitry Andric sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; 1845*0b57cec5SDimitry Andric } 1846*0b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 1847*0b57cec5SDimitry Andric "ucomiss", WriteFCom>, PS; 1848*0b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 1849*0b57cec5SDimitry Andric "ucomisd", WriteFCom>, PD; 1850*0b57cec5SDimitry Andric 1851*0b57cec5SDimitry Andric let Pattern = []<dag> in { 1852*0b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, 1853*0b57cec5SDimitry Andric "comiss", WriteFCom>, PS; 1854*0b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, 1855*0b57cec5SDimitry Andric "comisd", WriteFCom>, PD; 1856*0b57cec5SDimitry Andric } 1857*0b57cec5SDimitry Andric 1858*0b57cec5SDimitry Andric let isCodeGenOnly = 1 in { 1859*0b57cec5SDimitry Andric defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1860*0b57cec5SDimitry Andric sse_load_f32, "ucomiss", WriteFCom>, PS; 1861*0b57cec5SDimitry Andric defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1862*0b57cec5SDimitry Andric sse_load_f64, "ucomisd", WriteFCom>, PD; 1863*0b57cec5SDimitry Andric 1864*0b57cec5SDimitry Andric defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1865*0b57cec5SDimitry Andric sse_load_f32, "comiss", WriteFCom>, PS; 1866*0b57cec5SDimitry Andric defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1867*0b57cec5SDimitry Andric sse_load_f64, "comisd", WriteFCom>, PD; 1868*0b57cec5SDimitry Andric } 1869*0b57cec5SDimitry Andric} // Defs = [EFLAGS] 1870*0b57cec5SDimitry Andric 1871*0b57cec5SDimitry Andric// sse12_cmp_packed - sse 1 & 2 compare packed instructions 1872*0b57cec5SDimitry Andricmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 1873*0b57cec5SDimitry Andric ValueType VT, string asm, 1874*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, 1875*0b57cec5SDimitry Andric Domain d, PatFrag ld_frag> { 1876*0b57cec5SDimitry Andric let isCommutable = 1 in 1877*0b57cec5SDimitry Andric def rri : PIi8<0xC2, MRMSrcReg, 1878*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1879*0b57cec5SDimitry Andric [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>, 1880*0b57cec5SDimitry Andric Sched<[sched]>; 1881*0b57cec5SDimitry Andric def rmi : PIi8<0xC2, MRMSrcMem, 1882*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1883*0b57cec5SDimitry Andric [(set RC:$dst, 1884*0b57cec5SDimitry Andric (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>, 1885*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1886*0b57cec5SDimitry Andric} 1887*0b57cec5SDimitry Andric 1888*0b57cec5SDimitry Andricdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1889*0b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1890*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; 1891*0b57cec5SDimitry Andricdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1892*0b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1893*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; 1894*0b57cec5SDimitry Andricdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 1895*0b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1896*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; 1897*0b57cec5SDimitry Andricdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 1898*0b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1899*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; 1900*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 1901*0b57cec5SDimitry Andric defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1902*0b57cec5SDimitry Andric "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1903*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; 1904*0b57cec5SDimitry Andric defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1905*0b57cec5SDimitry Andric "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1906*0b57cec5SDimitry Andric SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; 1907*0b57cec5SDimitry Andric} 1908*0b57cec5SDimitry Andric 1909*0b57cec5SDimitry Andricdef CommutableCMPCC : PatLeaf<(imm), [{ 1910*0b57cec5SDimitry Andric uint64_t Imm = N->getZExtValue() & 0x7; 1911*0b57cec5SDimitry Andric return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 1912*0b57cec5SDimitry Andric}]>; 1913*0b57cec5SDimitry Andric 1914*0b57cec5SDimitry Andric// Patterns to select compares with loads in first operand. 1915*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 1916*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, 1917*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1918*0b57cec5SDimitry Andric (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 1919*0b57cec5SDimitry Andric 1920*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, 1921*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1922*0b57cec5SDimitry Andric (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>; 1923*0b57cec5SDimitry Andric 1924*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, 1925*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1926*0b57cec5SDimitry Andric (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 1927*0b57cec5SDimitry Andric 1928*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, 1929*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1930*0b57cec5SDimitry Andric (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; 1931*0b57cec5SDimitry Andric 1932*0b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1933*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1934*0b57cec5SDimitry Andric (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; 1935*0b57cec5SDimitry Andric 1936*0b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 1937*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1938*0b57cec5SDimitry Andric (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; 1939*0b57cec5SDimitry Andric} 1940*0b57cec5SDimitry Andric 1941*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 1942*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, 1943*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1944*0b57cec5SDimitry Andric (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 1945*0b57cec5SDimitry Andric 1946*0b57cec5SDimitry Andric def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1947*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1948*0b57cec5SDimitry Andric (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; 1949*0b57cec5SDimitry Andric} 1950*0b57cec5SDimitry Andric 1951*0b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 1952*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, 1953*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1954*0b57cec5SDimitry Andric (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; 1955*0b57cec5SDimitry Andric 1956*0b57cec5SDimitry Andric def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 1957*0b57cec5SDimitry Andric CommutableCMPCC:$cc)), 1958*0b57cec5SDimitry Andric (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; 1959*0b57cec5SDimitry Andric} 1960*0b57cec5SDimitry Andric 1961*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1962*0b57cec5SDimitry Andric// SSE 1 & 2 - Shuffle Instructions 1963*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1964*0b57cec5SDimitry Andric 1965*0b57cec5SDimitry Andric/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 1966*0b57cec5SDimitry Andricmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 1967*0b57cec5SDimitry Andric ValueType vt, string asm, PatFrag mem_frag, 1968*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 1969*0b57cec5SDimitry Andric bit IsCommutable = 0> { 1970*0b57cec5SDimitry Andric def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 1971*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 1972*0b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 1973*0b57cec5SDimitry Andric (i8 imm:$src3))))], d>, 1974*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 1975*0b57cec5SDimitry Andric let isCommutable = IsCommutable in 1976*0b57cec5SDimitry Andric def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 1977*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 1978*0b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 1979*0b57cec5SDimitry Andric (i8 imm:$src3))))], d>, 1980*0b57cec5SDimitry Andric Sched<[sched]>; 1981*0b57cec5SDimitry Andric} 1982*0b57cec5SDimitry Andric 1983*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 1984*0b57cec5SDimitry Andric defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 1985*0b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 1986*0b57cec5SDimitry Andric loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 1987*0b57cec5SDimitry Andric PS, VEX_4V, VEX_WIG; 1988*0b57cec5SDimitry Andric defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 1989*0b57cec5SDimitry Andric "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 1990*0b57cec5SDimitry Andric loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 1991*0b57cec5SDimitry Andric PS, VEX_4V, VEX_L, VEX_WIG; 1992*0b57cec5SDimitry Andric defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 1993*0b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 1994*0b57cec5SDimitry Andric loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 1995*0b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 1996*0b57cec5SDimitry Andric defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 1997*0b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 1998*0b57cec5SDimitry Andric loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 1999*0b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 2000*0b57cec5SDimitry Andric} 2001*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 2002*0b57cec5SDimitry Andric defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2003*0b57cec5SDimitry Andric "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2004*0b57cec5SDimitry Andric memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2005*0b57cec5SDimitry Andric defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2006*0b57cec5SDimitry Andric "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2007*0b57cec5SDimitry Andric memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2008*0b57cec5SDimitry Andric} 2009*0b57cec5SDimitry Andric 2010*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2011*0b57cec5SDimitry Andric// SSE 1 & 2 - Unpack FP Instructions 2012*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2013*0b57cec5SDimitry Andric 2014*0b57cec5SDimitry Andric/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2015*0b57cec5SDimitry Andricmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2016*0b57cec5SDimitry Andric PatFrag mem_frag, RegisterClass RC, 2017*0b57cec5SDimitry Andric X86MemOperand x86memop, string asm, 2018*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, Domain d, 2019*0b57cec5SDimitry Andric bit IsCommutable = 0> { 2020*0b57cec5SDimitry Andric let isCommutable = IsCommutable in 2021*0b57cec5SDimitry Andric def rr : PI<opc, MRMSrcReg, 2022*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 2023*0b57cec5SDimitry Andric asm, [(set RC:$dst, 2024*0b57cec5SDimitry Andric (vt (OpNode RC:$src1, RC:$src2)))], d>, 2025*0b57cec5SDimitry Andric Sched<[sched]>; 2026*0b57cec5SDimitry Andric def rm : PI<opc, MRMSrcMem, 2027*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2028*0b57cec5SDimitry Andric asm, [(set RC:$dst, 2029*0b57cec5SDimitry Andric (vt (OpNode RC:$src1, 2030*0b57cec5SDimitry Andric (mem_frag addr:$src2))))], d>, 2031*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 2032*0b57cec5SDimitry Andric} 2033*0b57cec5SDimitry Andric 2034*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 2035*0b57cec5SDimitry Andricdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 2036*0b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2037*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2038*0b57cec5SDimitry Andricdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 2039*0b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2040*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; 2041*0b57cec5SDimitry Andricdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 2042*0b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2043*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2044*0b57cec5SDimitry Andricdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 2045*0b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2046*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; 2047*0b57cec5SDimitry Andric 2048*0b57cec5SDimitry Andricdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 2049*0b57cec5SDimitry Andric VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2050*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2051*0b57cec5SDimitry Andricdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 2052*0b57cec5SDimitry Andric VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2053*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2054*0b57cec5SDimitry Andricdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 2055*0b57cec5SDimitry Andric VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2056*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2057*0b57cec5SDimitry Andricdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 2058*0b57cec5SDimitry Andric VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2059*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2060*0b57cec5SDimitry Andric}// Predicates = [HasAVX, NoVLX] 2061*0b57cec5SDimitry Andric 2062*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 2063*0b57cec5SDimitry Andric defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 2064*0b57cec5SDimitry Andric VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2065*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2066*0b57cec5SDimitry Andric defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 2067*0b57cec5SDimitry Andric VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2068*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2069*0b57cec5SDimitry Andric defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 2070*0b57cec5SDimitry Andric VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2071*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2072*0b57cec5SDimitry Andric defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 2073*0b57cec5SDimitry Andric VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2074*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; 2075*0b57cec5SDimitry Andric} // Constraints = "$src1 = $dst" 2076*0b57cec5SDimitry Andric 2077*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 2078*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 2079*0b57cec5SDimitry Andric (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2080*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2081*0b57cec5SDimitry Andric (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2082*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 2083*0b57cec5SDimitry Andric (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2084*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2085*0b57cec5SDimitry Andric (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2086*0b57cec5SDimitry Andric 2087*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2088*0b57cec5SDimitry Andric (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2089*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2090*0b57cec5SDimitry Andric (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2091*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2092*0b57cec5SDimitry Andric (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2093*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2094*0b57cec5SDimitry Andric (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2095*0b57cec5SDimitry Andric} 2096*0b57cec5SDimitry Andric 2097*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 2098*0b57cec5SDimitry Andric // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 2099*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Unpckl VR128:$src1, 2100*0b57cec5SDimitry Andric (v2f64 (nonvolatile_load addr:$src2)))), 2101*0b57cec5SDimitry Andric (MOVHPDrm VR128:$src1, addr:$src2)>; 2102*0b57cec5SDimitry Andric} 2103*0b57cec5SDimitry Andric 2104*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2105*0b57cec5SDimitry Andric// SSE 1 & 2 - Extract Floating-Point Sign mask 2106*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2107*0b57cec5SDimitry Andric 2108*0b57cec5SDimitry Andric/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2109*0b57cec5SDimitry Andricmulticlass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 2110*0b57cec5SDimitry Andric string asm, Domain d> { 2111*0b57cec5SDimitry Andric def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2112*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2113*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 2114*0b57cec5SDimitry Andric Sched<[WriteFMOVMSK]>; 2115*0b57cec5SDimitry Andric} 2116*0b57cec5SDimitry Andric 2117*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 2118*0b57cec5SDimitry Andric defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2119*0b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_WIG; 2120*0b57cec5SDimitry Andric defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2121*0b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_WIG; 2122*0b57cec5SDimitry Andric defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 2123*0b57cec5SDimitry Andric SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; 2124*0b57cec5SDimitry Andric defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 2125*0b57cec5SDimitry Andric SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; 2126*0b57cec5SDimitry Andric 2127*0b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2128*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 2129*0b57cec5SDimitry Andric (VMOVMSKPSrr VR128:$src)>; 2130*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 2131*0b57cec5SDimitry Andric (VMOVMSKPDrr VR128:$src)>; 2132*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v8i32 VR256:$src)), 2133*0b57cec5SDimitry Andric (VMOVMSKPSYrr VR256:$src)>; 2134*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i64 VR256:$src)), 2135*0b57cec5SDimitry Andric (VMOVMSKPDYrr VR256:$src)>; 2136*0b57cec5SDimitry Andric} 2137*0b57cec5SDimitry Andric 2138*0b57cec5SDimitry Andricdefm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2139*0b57cec5SDimitry Andric SSEPackedSingle>, PS; 2140*0b57cec5SDimitry Andricdefm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2141*0b57cec5SDimitry Andric SSEPackedDouble>, PD; 2142*0b57cec5SDimitry Andric 2143*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 2144*0b57cec5SDimitry Andric // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2145*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v4i32 VR128:$src)), 2146*0b57cec5SDimitry Andric (MOVMSKPSrr VR128:$src)>; 2147*0b57cec5SDimitry Andric def : Pat<(X86movmsk (v2i64 VR128:$src)), 2148*0b57cec5SDimitry Andric (MOVMSKPDrr VR128:$src)>; 2149*0b57cec5SDimitry Andric} 2150*0b57cec5SDimitry Andric 2151*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 2152*0b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 2153*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 2154*0b57cec5SDimitry Andric 2155*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 2156*0b57cec5SDimitry Andric 2157*0b57cec5SDimitry Andric/// PDI_binop_rm - Simple SSE2 binary operator. 2158*0b57cec5SDimitry Andricmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2159*0b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2160*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 2161*0b57cec5SDimitry Andric bit IsCommutable, bit Is2Addr> { 2162*0b57cec5SDimitry Andric let isCommutable = IsCommutable in 2163*0b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2164*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 2165*0b57cec5SDimitry Andric !if(Is2Addr, 2166*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2167*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2168*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 2169*0b57cec5SDimitry Andric Sched<[sched]>; 2170*0b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2171*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 2172*0b57cec5SDimitry Andric !if(Is2Addr, 2173*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2174*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2175*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 2176*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 2177*0b57cec5SDimitry Andric} 2178*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 2179*0b57cec5SDimitry Andric 2180*0b57cec5SDimitry Andricmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2181*0b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, 2182*0b57cec5SDimitry Andric X86SchedWriteWidths sched, bit IsCommutable, 2183*0b57cec5SDimitry Andric Predicate prd> { 2184*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 2185*0b57cec5SDimitry Andric defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2186*0b57cec5SDimitry Andric VR128, load, i128mem, sched.XMM, 2187*0b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_WIG; 2188*0b57cec5SDimitry Andric 2189*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 2190*0b57cec5SDimitry Andric defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2191*0b57cec5SDimitry Andric memop, i128mem, sched.XMM, IsCommutable, 1>; 2192*0b57cec5SDimitry Andric 2193*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 2194*0b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2195*0b57cec5SDimitry Andric OpVT256, VR256, load, i256mem, sched.YMM, 2196*0b57cec5SDimitry Andric IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; 2197*0b57cec5SDimitry Andric} 2198*0b57cec5SDimitry Andric 2199*0b57cec5SDimitry Andric// These are ordered here for pattern ordering requirements with the fp versions 2200*0b57cec5SDimitry Andric 2201*0b57cec5SDimitry Andricdefm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2202*0b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 2203*0b57cec5SDimitry Andricdefm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2204*0b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 2205*0b57cec5SDimitry Andricdefm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2206*0b57cec5SDimitry Andric SchedWriteVecLogic, 1, NoVLX>; 2207*0b57cec5SDimitry Andricdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2208*0b57cec5SDimitry Andric SchedWriteVecLogic, 0, NoVLX>; 2209*0b57cec5SDimitry Andric 2210*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2211*0b57cec5SDimitry Andric// SSE 1 & 2 - Logical Instructions 2212*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2213*0b57cec5SDimitry Andric 2214*0b57cec5SDimitry Andric/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2215*0b57cec5SDimitry Andric/// 2216*0b57cec5SDimitry Andric/// There are no patterns here because isel prefers integer versions for SSE2 2217*0b57cec5SDimitry Andric/// and later. There are SSE1 v4f32 patterns later. 2218*0b57cec5SDimitry Andricmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2219*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 2220*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 2221*0b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2222*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 2223*0b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2224*0b57cec5SDimitry Andric 2225*0b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2226*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 2227*0b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2228*0b57cec5SDimitry Andric 2229*0b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2230*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2231*0b57cec5SDimitry Andric [], [], 0>, PS, VEX_4V, VEX_WIG; 2232*0b57cec5SDimitry Andric 2233*0b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2234*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2235*0b57cec5SDimitry Andric [], [], 0>, PD, VEX_4V, VEX_WIG; 2236*0b57cec5SDimitry Andric } 2237*0b57cec5SDimitry Andric 2238*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2239*0b57cec5SDimitry Andric defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2240*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2241*0b57cec5SDimitry Andric [], []>, PS; 2242*0b57cec5SDimitry Andric 2243*0b57cec5SDimitry Andric defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2244*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2245*0b57cec5SDimitry Andric [], []>, PD; 2246*0b57cec5SDimitry Andric } 2247*0b57cec5SDimitry Andric} 2248*0b57cec5SDimitry Andric 2249*0b57cec5SDimitry Andricdefm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>; 2250*0b57cec5SDimitry Andricdefm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>; 2251*0b57cec5SDimitry Andricdefm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>; 2252*0b57cec5SDimitry Andriclet isCommutable = 0 in 2253*0b57cec5SDimitry Andric defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>; 2254*0b57cec5SDimitry Andric 2255*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 2256*0b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2257*0b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 2258*0b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2259*0b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 2260*0b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2261*0b57cec5SDimitry Andric (VPANDYrr VR256:$src1, VR256:$src2)>; 2262*0b57cec5SDimitry Andric 2263*0b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2264*0b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 2265*0b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2266*0b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 2267*0b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2268*0b57cec5SDimitry Andric (VPORYrr VR256:$src1, VR256:$src2)>; 2269*0b57cec5SDimitry Andric 2270*0b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2271*0b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 2272*0b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2273*0b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 2274*0b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2275*0b57cec5SDimitry Andric (VPXORYrr VR256:$src1, VR256:$src2)>; 2276*0b57cec5SDimitry Andric 2277*0b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2278*0b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 2279*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2280*0b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 2281*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2282*0b57cec5SDimitry Andric (VPANDNYrr VR256:$src1, VR256:$src2)>; 2283*0b57cec5SDimitry Andric 2284*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2285*0b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 2286*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2287*0b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 2288*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2289*0b57cec5SDimitry Andric (VPANDYrm VR256:$src1, addr:$src2)>; 2290*0b57cec5SDimitry Andric 2291*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2292*0b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 2293*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2294*0b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 2295*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2296*0b57cec5SDimitry Andric (VPORYrm VR256:$src1, addr:$src2)>; 2297*0b57cec5SDimitry Andric 2298*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2299*0b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 2300*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2301*0b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 2302*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2303*0b57cec5SDimitry Andric (VPXORYrm VR256:$src1, addr:$src2)>; 2304*0b57cec5SDimitry Andric 2305*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2306*0b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 2307*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2308*0b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 2309*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2310*0b57cec5SDimitry Andric (VPANDNYrm VR256:$src1, addr:$src2)>; 2311*0b57cec5SDimitry Andric} 2312*0b57cec5SDimitry Andric 2313*0b57cec5SDimitry Andric// If only AVX1 is supported, we need to handle integer operations with 2314*0b57cec5SDimitry Andric// floating point instructions since the integer versions aren't available. 2315*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 2316*0b57cec5SDimitry Andric def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2317*0b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 2318*0b57cec5SDimitry Andric def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2319*0b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 2320*0b57cec5SDimitry Andric def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2321*0b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 2322*0b57cec5SDimitry Andric def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 2323*0b57cec5SDimitry Andric (VANDPSYrr VR256:$src1, VR256:$src2)>; 2324*0b57cec5SDimitry Andric 2325*0b57cec5SDimitry Andric def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2326*0b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 2327*0b57cec5SDimitry Andric def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2328*0b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 2329*0b57cec5SDimitry Andric def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2330*0b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 2331*0b57cec5SDimitry Andric def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 2332*0b57cec5SDimitry Andric (VORPSYrr VR256:$src1, VR256:$src2)>; 2333*0b57cec5SDimitry Andric 2334*0b57cec5SDimitry Andric def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2335*0b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 2336*0b57cec5SDimitry Andric def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2337*0b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 2338*0b57cec5SDimitry Andric def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2339*0b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 2340*0b57cec5SDimitry Andric def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 2341*0b57cec5SDimitry Andric (VXORPSYrr VR256:$src1, VR256:$src2)>; 2342*0b57cec5SDimitry Andric 2343*0b57cec5SDimitry Andric def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2344*0b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2345*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2346*0b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2347*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2348*0b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2349*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 2350*0b57cec5SDimitry Andric (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2351*0b57cec5SDimitry Andric 2352*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2353*0b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 2354*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2355*0b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 2356*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2357*0b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 2358*0b57cec5SDimitry Andric def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 2359*0b57cec5SDimitry Andric (VANDPSYrm VR256:$src1, addr:$src2)>; 2360*0b57cec5SDimitry Andric 2361*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2362*0b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 2363*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2364*0b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 2365*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2366*0b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 2367*0b57cec5SDimitry Andric def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 2368*0b57cec5SDimitry Andric (VORPSYrm VR256:$src1, addr:$src2)>; 2369*0b57cec5SDimitry Andric 2370*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2371*0b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 2372*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2373*0b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 2374*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2375*0b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 2376*0b57cec5SDimitry Andric def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 2377*0b57cec5SDimitry Andric (VXORPSYrm VR256:$src1, addr:$src2)>; 2378*0b57cec5SDimitry Andric 2379*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2380*0b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 2381*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2382*0b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 2383*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2384*0b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 2385*0b57cec5SDimitry Andric def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 2386*0b57cec5SDimitry Andric (VANDNPSYrm VR256:$src1, addr:$src2)>; 2387*0b57cec5SDimitry Andric} 2388*0b57cec5SDimitry Andric 2389*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 2390*0b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2391*0b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 2392*0b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2393*0b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 2394*0b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2395*0b57cec5SDimitry Andric (VPANDrr VR128:$src1, VR128:$src2)>; 2396*0b57cec5SDimitry Andric 2397*0b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2398*0b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 2399*0b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2400*0b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 2401*0b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2402*0b57cec5SDimitry Andric (VPORrr VR128:$src1, VR128:$src2)>; 2403*0b57cec5SDimitry Andric 2404*0b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2405*0b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 2406*0b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2407*0b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 2408*0b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2409*0b57cec5SDimitry Andric (VPXORrr VR128:$src1, VR128:$src2)>; 2410*0b57cec5SDimitry Andric 2411*0b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2412*0b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 2413*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2414*0b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 2415*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2416*0b57cec5SDimitry Andric (VPANDNrr VR128:$src1, VR128:$src2)>; 2417*0b57cec5SDimitry Andric 2418*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 2419*0b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 2420*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 2421*0b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 2422*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 2423*0b57cec5SDimitry Andric (VPANDrm VR128:$src1, addr:$src2)>; 2424*0b57cec5SDimitry Andric 2425*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 2426*0b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 2427*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 2428*0b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 2429*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 2430*0b57cec5SDimitry Andric (VPORrm VR128:$src1, addr:$src2)>; 2431*0b57cec5SDimitry Andric 2432*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 2433*0b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 2434*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 2435*0b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 2436*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 2437*0b57cec5SDimitry Andric (VPXORrm VR128:$src1, addr:$src2)>; 2438*0b57cec5SDimitry Andric 2439*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 2440*0b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 2441*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 2442*0b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 2443*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 2444*0b57cec5SDimitry Andric (VPANDNrm VR128:$src1, addr:$src2)>; 2445*0b57cec5SDimitry Andric} 2446*0b57cec5SDimitry Andric 2447*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 2448*0b57cec5SDimitry Andric def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2449*0b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 2450*0b57cec5SDimitry Andric def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2451*0b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 2452*0b57cec5SDimitry Andric def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2453*0b57cec5SDimitry Andric (PANDrr VR128:$src1, VR128:$src2)>; 2454*0b57cec5SDimitry Andric 2455*0b57cec5SDimitry Andric def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2456*0b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 2457*0b57cec5SDimitry Andric def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2458*0b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 2459*0b57cec5SDimitry Andric def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2460*0b57cec5SDimitry Andric (PORrr VR128:$src1, VR128:$src2)>; 2461*0b57cec5SDimitry Andric 2462*0b57cec5SDimitry Andric def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2463*0b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 2464*0b57cec5SDimitry Andric def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2465*0b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 2466*0b57cec5SDimitry Andric def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2467*0b57cec5SDimitry Andric (PXORrr VR128:$src1, VR128:$src2)>; 2468*0b57cec5SDimitry Andric 2469*0b57cec5SDimitry Andric def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2470*0b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 2471*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2472*0b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 2473*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2474*0b57cec5SDimitry Andric (PANDNrr VR128:$src1, VR128:$src2)>; 2475*0b57cec5SDimitry Andric 2476*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 2477*0b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 2478*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 2479*0b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 2480*0b57cec5SDimitry Andric def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 2481*0b57cec5SDimitry Andric (PANDrm VR128:$src1, addr:$src2)>; 2482*0b57cec5SDimitry Andric 2483*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 2484*0b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 2485*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 2486*0b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 2487*0b57cec5SDimitry Andric def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 2488*0b57cec5SDimitry Andric (PORrm VR128:$src1, addr:$src2)>; 2489*0b57cec5SDimitry Andric 2490*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 2491*0b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 2492*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 2493*0b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 2494*0b57cec5SDimitry Andric def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 2495*0b57cec5SDimitry Andric (PXORrm VR128:$src1, addr:$src2)>; 2496*0b57cec5SDimitry Andric 2497*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 2498*0b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 2499*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 2500*0b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 2501*0b57cec5SDimitry Andric def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 2502*0b57cec5SDimitry Andric (PANDNrm VR128:$src1, addr:$src2)>; 2503*0b57cec5SDimitry Andric} 2504*0b57cec5SDimitry Andric 2505*0b57cec5SDimitry Andric// Patterns for packed operations when we don't have integer type available. 2506*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 2507*0b57cec5SDimitry Andric (ANDPSrr VR128:$src1, VR128:$src2)>; 2508*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 2509*0b57cec5SDimitry Andric (ORPSrr VR128:$src1, VR128:$src2)>; 2510*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 2511*0b57cec5SDimitry Andric (XORPSrr VR128:$src1, VR128:$src2)>; 2512*0b57cec5SDimitry Andricdef : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 2513*0b57cec5SDimitry Andric (ANDNPSrr VR128:$src1, VR128:$src2)>; 2514*0b57cec5SDimitry Andric 2515*0b57cec5SDimitry Andricdef : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 2516*0b57cec5SDimitry Andric (ANDPSrm VR128:$src1, addr:$src2)>; 2517*0b57cec5SDimitry Andricdef : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 2518*0b57cec5SDimitry Andric (ORPSrm VR128:$src1, addr:$src2)>; 2519*0b57cec5SDimitry Andricdef : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 2520*0b57cec5SDimitry Andric (XORPSrm VR128:$src1, addr:$src2)>; 2521*0b57cec5SDimitry Andricdef : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 2522*0b57cec5SDimitry Andric (ANDNPSrm VR128:$src1, addr:$src2)>; 2523*0b57cec5SDimitry Andric 2524*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2525*0b57cec5SDimitry Andric// SSE 1 & 2 - Arithmetic Instructions 2526*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2527*0b57cec5SDimitry Andric 2528*0b57cec5SDimitry Andric/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2529*0b57cec5SDimitry Andric/// vector forms. 2530*0b57cec5SDimitry Andric/// 2531*0b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 2532*0b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 2533*0b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2534*0b57cec5SDimitry Andric/// and leaves the top elements unmodified (therefore these cannot be commuted). 2535*0b57cec5SDimitry Andric/// 2536*0b57cec5SDimitry Andric/// These three forms can each be reg+reg or reg+mem. 2537*0b57cec5SDimitry Andric/// 2538*0b57cec5SDimitry Andric 2539*0b57cec5SDimitry Andric/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2540*0b57cec5SDimitry Andric/// classes below 2541*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2542*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteSizes sched> { 2543*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 2544*0b57cec5SDimitry Andric defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2545*0b57cec5SDimitry Andric VR128, v4f32, f128mem, loadv4f32, 2546*0b57cec5SDimitry Andric SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; 2547*0b57cec5SDimitry Andric defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2548*0b57cec5SDimitry Andric VR128, v2f64, f128mem, loadv2f64, 2549*0b57cec5SDimitry Andric SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; 2550*0b57cec5SDimitry Andric 2551*0b57cec5SDimitry Andric defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 2552*0b57cec5SDimitry Andric OpNode, VR256, v8f32, f256mem, loadv8f32, 2553*0b57cec5SDimitry Andric SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2554*0b57cec5SDimitry Andric defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 2555*0b57cec5SDimitry Andric OpNode, VR256, v4f64, f256mem, loadv4f64, 2556*0b57cec5SDimitry Andric SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2557*0b57cec5SDimitry Andric } 2558*0b57cec5SDimitry Andric 2559*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2560*0b57cec5SDimitry Andric defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2561*0b57cec5SDimitry Andric v4f32, f128mem, memopv4f32, SSEPackedSingle, 2562*0b57cec5SDimitry Andric sched.PS.XMM>, PS; 2563*0b57cec5SDimitry Andric defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2564*0b57cec5SDimitry Andric v2f64, f128mem, memopv2f64, SSEPackedDouble, 2565*0b57cec5SDimitry Andric sched.PD.XMM>, PD; 2566*0b57cec5SDimitry Andric } 2567*0b57cec5SDimitry Andric} 2568*0b57cec5SDimitry Andric 2569*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2570*0b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2571*0b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2572*0b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 2573*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 2574*0b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2575*0b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 2576*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 2577*0b57cec5SDimitry Andric 2578*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2579*0b57cec5SDimitry Andric defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2580*0b57cec5SDimitry Andric OpNode, FR32, f32mem, SSEPackedSingle, 2581*0b57cec5SDimitry Andric sched.PS.Scl>, XS; 2582*0b57cec5SDimitry Andric defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2583*0b57cec5SDimitry Andric OpNode, FR64, f64mem, SSEPackedDouble, 2584*0b57cec5SDimitry Andric sched.PD.Scl>, XD; 2585*0b57cec5SDimitry Andric } 2586*0b57cec5SDimitry Andric} 2587*0b57cec5SDimitry Andric 2588*0b57cec5SDimitry Andricmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2589*0b57cec5SDimitry Andric SDPatternOperator OpNode, 2590*0b57cec5SDimitry Andric X86SchedWriteSizes sched> { 2591*0b57cec5SDimitry Andric defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 2592*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2593*0b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; 2594*0b57cec5SDimitry Andric defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 2595*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2596*0b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; 2597*0b57cec5SDimitry Andric 2598*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 2599*0b57cec5SDimitry Andric defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 2600*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2601*0b57cec5SDimitry Andric SSEPackedSingle, sched.PS.Scl>, XS; 2602*0b57cec5SDimitry Andric defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 2603*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2604*0b57cec5SDimitry Andric SSEPackedDouble, sched.PD.Scl>, XD; 2605*0b57cec5SDimitry Andric } 2606*0b57cec5SDimitry Andric} 2607*0b57cec5SDimitry Andric 2608*0b57cec5SDimitry Andric// Binary Arithmetic instructions 2609*0b57cec5SDimitry Andricdefm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, 2610*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, 2611*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 2612*0b57cec5SDimitry Andricdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, 2613*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, 2614*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 2615*0b57cec5SDimitry Andriclet isCommutable = 0 in { 2616*0b57cec5SDimitry Andric defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, 2617*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, 2618*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 2619*0b57cec5SDimitry Andric defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, 2620*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, 2621*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 2622*0b57cec5SDimitry Andric defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2623*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2624*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 2625*0b57cec5SDimitry Andric defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2626*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2627*0b57cec5SDimitry Andric basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 2628*0b57cec5SDimitry Andric} 2629*0b57cec5SDimitry Andric 2630*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in { 2631*0b57cec5SDimitry Andric defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 2632*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 2633*0b57cec5SDimitry Andric defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 2634*0b57cec5SDimitry Andric basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 2635*0b57cec5SDimitry Andric} 2636*0b57cec5SDimitry Andric 2637*0b57cec5SDimitry Andric// Patterns used to select SSE scalar fp arithmetic instructions from 2638*0b57cec5SDimitry Andric// either: 2639*0b57cec5SDimitry Andric// 2640*0b57cec5SDimitry Andric// (1) a scalar fp operation followed by a blend 2641*0b57cec5SDimitry Andric// 2642*0b57cec5SDimitry Andric// The effect is that the backend no longer emits unnecessary vector 2643*0b57cec5SDimitry Andric// insert instructions immediately after SSE scalar fp instructions 2644*0b57cec5SDimitry Andric// like addss or mulss. 2645*0b57cec5SDimitry Andric// 2646*0b57cec5SDimitry Andric// For example, given the following code: 2647*0b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 2648*0b57cec5SDimitry Andric// A[0] += B[0]; 2649*0b57cec5SDimitry Andric// return A; 2650*0b57cec5SDimitry Andric// } 2651*0b57cec5SDimitry Andric// 2652*0b57cec5SDimitry Andric// Previously we generated: 2653*0b57cec5SDimitry Andric// addss %xmm0, %xmm1 2654*0b57cec5SDimitry Andric// movss %xmm1, %xmm0 2655*0b57cec5SDimitry Andric// 2656*0b57cec5SDimitry Andric// We now generate: 2657*0b57cec5SDimitry Andric// addss %xmm1, %xmm0 2658*0b57cec5SDimitry Andric// 2659*0b57cec5SDimitry Andric// (2) a vector packed single/double fp operation followed by a vector insert 2660*0b57cec5SDimitry Andric// 2661*0b57cec5SDimitry Andric// The effect is that the backend converts the packed fp instruction 2662*0b57cec5SDimitry Andric// followed by a vector insert into a single SSE scalar fp instruction. 2663*0b57cec5SDimitry Andric// 2664*0b57cec5SDimitry Andric// For example, given the following code: 2665*0b57cec5SDimitry Andric// __m128 foo(__m128 A, __m128 B) { 2666*0b57cec5SDimitry Andric// __m128 C = A + B; 2667*0b57cec5SDimitry Andric// return (__m128) {c[0], a[1], a[2], a[3]}; 2668*0b57cec5SDimitry Andric// } 2669*0b57cec5SDimitry Andric// 2670*0b57cec5SDimitry Andric// Previously we generated: 2671*0b57cec5SDimitry Andric// addps %xmm0, %xmm1 2672*0b57cec5SDimitry Andric// movss %xmm1, %xmm0 2673*0b57cec5SDimitry Andric// 2674*0b57cec5SDimitry Andric// We now generate: 2675*0b57cec5SDimitry Andric// addss %xmm1, %xmm0 2676*0b57cec5SDimitry Andric 2677*0b57cec5SDimitry Andric// TODO: Some canonicalization in lowering would simplify the number of 2678*0b57cec5SDimitry Andric// patterns we have to try to match. 2679*0b57cec5SDimitry Andricmulticlass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move, 2680*0b57cec5SDimitry Andric ValueType VT, ValueType EltTy, 2681*0b57cec5SDimitry Andric RegisterClass RC, PatFrag ld_frag, 2682*0b57cec5SDimitry Andric Predicate BasePredicate> { 2683*0b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 2684*0b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 2685*0b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 2686*0b57cec5SDimitry Andric (VT (scalar_to_vector 2687*0b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2688*0b57cec5SDimitry Andric RC:$src))))), 2689*0b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 2690*0b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2691*0b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 2692*0b57cec5SDimitry Andric (VT (scalar_to_vector 2693*0b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2694*0b57cec5SDimitry Andric (ld_frag addr:$src)))))), 2695*0b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2696*0b57cec5SDimitry Andric } 2697*0b57cec5SDimitry Andric 2698*0b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 2699*0b57cec5SDimitry Andric let Predicates = [UseAVX] in { 2700*0b57cec5SDimitry Andric // extracted scalar math op with insert via movss/movsd 2701*0b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 2702*0b57cec5SDimitry Andric (VT (scalar_to_vector 2703*0b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2704*0b57cec5SDimitry Andric RC:$src))))), 2705*0b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 2706*0b57cec5SDimitry Andric (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2707*0b57cec5SDimitry Andric def : Pat<(VT (Move (VT VR128:$dst), 2708*0b57cec5SDimitry Andric (VT (scalar_to_vector 2709*0b57cec5SDimitry Andric (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2710*0b57cec5SDimitry Andric (ld_frag addr:$src)))))), 2711*0b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2712*0b57cec5SDimitry Andric } 2713*0b57cec5SDimitry Andric} 2714*0b57cec5SDimitry Andric 2715*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2716*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2717*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2718*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2719*0b57cec5SDimitry Andric 2720*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2721*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2722*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2723*0b57cec5SDimitry Andricdefm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2724*0b57cec5SDimitry Andric 2725*0b57cec5SDimitry Andric/// Unop Arithmetic 2726*0b57cec5SDimitry Andric/// In addition, we also have a special variant of the scalar form here to 2727*0b57cec5SDimitry Andric/// represent the associated intrinsic operation. This form is unlike the 2728*0b57cec5SDimitry Andric/// plain scalar form, in that it takes an entire vector (instead of a 2729*0b57cec5SDimitry Andric/// scalar) and leaves the top elements undefined. 2730*0b57cec5SDimitry Andric/// 2731*0b57cec5SDimitry Andric/// And, we have a special variant form for a full-vector intrinsic form. 2732*0b57cec5SDimitry Andric 2733*0b57cec5SDimitry Andric/// sse_fp_unop_s - SSE1 unops in scalar form 2734*0b57cec5SDimitry Andric/// For the non-AVX defs, we need $src1 to be tied to $dst because 2735*0b57cec5SDimitry Andric/// the HW instructions are 2 operand / destructive. 2736*0b57cec5SDimitry Andricmulticlass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2737*0b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 2738*0b57cec5SDimitry Andric Operand intmemop, SDNode OpNode, Domain d, 2739*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 2740*0b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 2741*0b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 2742*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2743*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 2744*0b57cec5SDimitry Andric Requires<[target]>; 2745*0b57cec5SDimitry Andric let mayLoad = 1 in 2746*0b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 2747*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2748*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 2749*0b57cec5SDimitry Andric Sched<[sched.Folded]>, 2750*0b57cec5SDimitry Andric Requires<[target, OptForSize]>; 2751*0b57cec5SDimitry Andric } 2752*0b57cec5SDimitry Andric 2753*0b57cec5SDimitry Andric let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 2754*0b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 2755*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2756*0b57cec5SDimitry Andric Sched<[sched]>; 2757*0b57cec5SDimitry Andric let mayLoad = 1 in 2758*0b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 2759*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2760*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 2761*0b57cec5SDimitry Andric } 2762*0b57cec5SDimitry Andric 2763*0b57cec5SDimitry Andric} 2764*0b57cec5SDimitry Andric 2765*0b57cec5SDimitry Andricmulticlass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt, 2766*0b57cec5SDimitry Andric ComplexPattern int_cpat, Intrinsic Intr, 2767*0b57cec5SDimitry Andric Predicate target, string Suffix> { 2768*0b57cec5SDimitry Andric let Predicates = [target] in { 2769*0b57cec5SDimitry Andric // These are unary operations, but they are modeled as having 2 source operands 2770*0b57cec5SDimitry Andric // because the high elements of the destination are unchanged in SSE. 2771*0b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 2772*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 2773*0b57cec5SDimitry Andric } 2774*0b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 2775*0b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 2776*0b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 2777*0b57cec5SDimitry Andric // movss mem, %xmm0 2778*0b57cec5SDimitry Andric // rcpss %xmm0, %xmm0 2779*0b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 2780*0b57cec5SDimitry Andric // rcpss mem, %xmm0 2781*0b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 2782*0b57cec5SDimitry Andric def : Pat<(Intr int_cpat:$src2), 2783*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 2784*0b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 2785*0b57cec5SDimitry Andric } 2786*0b57cec5SDimitry Andric} 2787*0b57cec5SDimitry Andric 2788*0b57cec5SDimitry Andricmulticlass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int_cpat, 2789*0b57cec5SDimitry Andric Intrinsic Intr, Predicate target> { 2790*0b57cec5SDimitry Andric let Predicates = [target] in { 2791*0b57cec5SDimitry Andric def : Pat<(Intr VR128:$src), 2792*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#r_Int) VR128:$src, 2793*0b57cec5SDimitry Andric VR128:$src)>; 2794*0b57cec5SDimitry Andric } 2795*0b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 2796*0b57cec5SDimitry Andric def : Pat<(Intr int_cpat:$src2), 2797*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#m_Int) 2798*0b57cec5SDimitry Andric (vt (IMPLICIT_DEF)), addr:$src2)>; 2799*0b57cec5SDimitry Andric } 2800*0b57cec5SDimitry Andric} 2801*0b57cec5SDimitry Andric 2802*0b57cec5SDimitry Andricmulticlass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2803*0b57cec5SDimitry Andric ValueType ScalarVT, X86MemOperand x86memop, 2804*0b57cec5SDimitry Andric Operand intmemop, SDNode OpNode, Domain d, 2805*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, Predicate target> { 2806*0b57cec5SDimitry Andric let isCodeGenOnly = 1, hasSideEffects = 0 in { 2807*0b57cec5SDimitry Andric def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 2808*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2809*0b57cec5SDimitry Andric [], d>, Sched<[sched]>; 2810*0b57cec5SDimitry Andric let mayLoad = 1 in 2811*0b57cec5SDimitry Andric def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2812*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2813*0b57cec5SDimitry Andric [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2814*0b57cec5SDimitry Andric } 2815*0b57cec5SDimitry Andric let hasSideEffects = 0, ExeDomain = d in { 2816*0b57cec5SDimitry Andric def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 2817*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 2818*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2819*0b57cec5SDimitry Andric []>, Sched<[sched]>; 2820*0b57cec5SDimitry Andric let mayLoad = 1 in 2821*0b57cec5SDimitry Andric def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 2822*0b57cec5SDimitry Andric (ins VR128:$src1, intmemop:$src2), 2823*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2824*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2825*0b57cec5SDimitry Andric } 2826*0b57cec5SDimitry Andric 2827*0b57cec5SDimitry Andric // We don't want to fold scalar loads into these instructions unless 2828*0b57cec5SDimitry Andric // optimizing for size. This is because the folded instruction will have a 2829*0b57cec5SDimitry Andric // partial register update, while the unfolded sequence will not, e.g. 2830*0b57cec5SDimitry Andric // vmovss mem, %xmm0 2831*0b57cec5SDimitry Andric // vrcpss %xmm0, %xmm0, %xmm0 2832*0b57cec5SDimitry Andric // which has a clobber before the rcp, vs. 2833*0b57cec5SDimitry Andric // vrcpss mem, %xmm0, %xmm0 2834*0b57cec5SDimitry Andric // TODO: In theory, we could fold the load, and avoid the stall caused by 2835*0b57cec5SDimitry Andric // the partial register store, either in BreakFalseDeps or with smarter RA. 2836*0b57cec5SDimitry Andric let Predicates = [target] in { 2837*0b57cec5SDimitry Andric def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 2838*0b57cec5SDimitry Andric (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 2839*0b57cec5SDimitry Andric } 2840*0b57cec5SDimitry Andric let Predicates = [target, OptForSize] in { 2841*0b57cec5SDimitry Andric def : Pat<(ScalarVT (OpNode (load addr:$src))), 2842*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 2843*0b57cec5SDimitry Andric addr:$src)>; 2844*0b57cec5SDimitry Andric } 2845*0b57cec5SDimitry Andric} 2846*0b57cec5SDimitry Andric 2847*0b57cec5SDimitry Andric/// sse1_fp_unop_p - SSE1 unops in packed form. 2848*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 2849*0b57cec5SDimitry Andric X86SchedWriteWidths sched, list<Predicate> prds> { 2850*0b57cec5SDimitry Andriclet Predicates = prds in { 2851*0b57cec5SDimitry Andric def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2852*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2853*0b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 2854*0b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2855*0b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 2856*0b57cec5SDimitry Andric def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2857*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2858*0b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 2859*0b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 2860*0b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2861*0b57cec5SDimitry Andric def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2862*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2863*0b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 2864*0b57cec5SDimitry Andric [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 2865*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2866*0b57cec5SDimitry Andric def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2867*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2868*0b57cec5SDimitry Andric "ps\t{$src, $dst|$dst, $src}"), 2869*0b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 2870*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2871*0b57cec5SDimitry Andric} 2872*0b57cec5SDimitry Andric 2873*0b57cec5SDimitry Andric def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2874*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2875*0b57cec5SDimitry Andric [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2876*0b57cec5SDimitry Andric Sched<[sched.XMM]>; 2877*0b57cec5SDimitry Andric def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2878*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2879*0b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 2880*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 2881*0b57cec5SDimitry Andric} 2882*0b57cec5SDimitry Andric 2883*0b57cec5SDimitry Andric/// sse2_fp_unop_p - SSE2 unops in vector forms. 2884*0b57cec5SDimitry Andricmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2885*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 2886*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 2887*0b57cec5SDimitry Andric def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2888*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2889*0b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 2890*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2891*0b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 2892*0b57cec5SDimitry Andric def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2893*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2894*0b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 2895*0b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 2896*0b57cec5SDimitry Andric VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2897*0b57cec5SDimitry Andric def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2898*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2899*0b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 2900*0b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 2901*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2902*0b57cec5SDimitry Andric def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2903*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 2904*0b57cec5SDimitry Andric "pd\t{$src, $dst|$dst, $src}"), 2905*0b57cec5SDimitry Andric [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 2906*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2907*0b57cec5SDimitry Andric} 2908*0b57cec5SDimitry Andric 2909*0b57cec5SDimitry Andric def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2910*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2911*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2912*0b57cec5SDimitry Andric Sched<[sched.XMM]>; 2913*0b57cec5SDimitry Andric def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2914*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2915*0b57cec5SDimitry Andric [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 2916*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 2917*0b57cec5SDimitry Andric} 2918*0b57cec5SDimitry Andric 2919*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode, 2920*0b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 2921*0b57cec5SDimitry Andric defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 2922*0b57cec5SDimitry Andric !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), 2923*0b57cec5SDimitry Andric UseSSE1, "SS">, XS; 2924*0b57cec5SDimitry Andric defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 2925*0b57cec5SDimitry Andric !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), 2926*0b57cec5SDimitry Andric AVXTarget>, 2927*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; 2928*0b57cec5SDimitry Andric} 2929*0b57cec5SDimitry Andric 2930*0b57cec5SDimitry Andricmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2931*0b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 2932*0b57cec5SDimitry Andric defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, f32, f32mem, 2933*0b57cec5SDimitry Andric ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; 2934*0b57cec5SDimitry Andric defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, f32, 2935*0b57cec5SDimitry Andric f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 2936*0b57cec5SDimitry Andric XS, VEX_4V, VEX_LIG, VEX_WIG; 2937*0b57cec5SDimitry Andric} 2938*0b57cec5SDimitry Andric 2939*0b57cec5SDimitry Andricmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2940*0b57cec5SDimitry Andric X86SchedWriteWidths sched, Predicate AVXTarget> { 2941*0b57cec5SDimitry Andric defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, f64, f64mem, 2942*0b57cec5SDimitry Andric sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; 2943*0b57cec5SDimitry Andric defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, f64, 2944*0b57cec5SDimitry Andric f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 2945*0b57cec5SDimitry Andric XD, VEX_4V, VEX_LIG, VEX_WIG; 2946*0b57cec5SDimitry Andric} 2947*0b57cec5SDimitry Andric 2948*0b57cec5SDimitry Andric// Square root. 2949*0b57cec5SDimitry Andricdefm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, 2950*0b57cec5SDimitry Andric sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 2951*0b57cec5SDimitry Andric sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, 2952*0b57cec5SDimitry Andric sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; 2953*0b57cec5SDimitry Andric 2954*0b57cec5SDimitry Andric// Reciprocal approximations. Note that these typically require refinement 2955*0b57cec5SDimitry Andric// in order to obtain suitable precision. 2956*0b57cec5SDimitry Andricdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 2957*0b57cec5SDimitry Andric sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 2958*0b57cec5SDimitry Andric sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 2959*0b57cec5SDimitry Andricdefm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 2960*0b57cec5SDimitry Andric sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 2961*0b57cec5SDimitry Andric sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 2962*0b57cec5SDimitry Andric 2963*0b57cec5SDimitry Andric// There is no f64 version of the reciprocal approximation instructions. 2964*0b57cec5SDimitry Andric 2965*0b57cec5SDimitry Andricmulticlass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Move, 2966*0b57cec5SDimitry Andric ValueType VT, Predicate BasePredicate> { 2967*0b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 2968*0b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 2969*0b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 2970*0b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 2971*0b57cec5SDimitry Andric } 2972*0b57cec5SDimitry Andric 2973*0b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 2974*0b57cec5SDimitry Andric let Predicates = [UseAVX] in { 2975*0b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (scalar_to_vector 2976*0b57cec5SDimitry Andric (OpNode (extractelt VT:$src, 0))))), 2977*0b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 2978*0b57cec5SDimitry Andric } 2979*0b57cec5SDimitry Andric} 2980*0b57cec5SDimitry Andric 2981*0b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 2982*0b57cec5SDimitry Andricdefm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 2983*0b57cec5SDimitry Andric 2984*0b57cec5SDimitry Andricmulticlass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 2985*0b57cec5SDimitry Andric SDNode Move, ValueType VT, 2986*0b57cec5SDimitry Andric Predicate BasePredicate> { 2987*0b57cec5SDimitry Andric let Predicates = [BasePredicate] in { 2988*0b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 2989*0b57cec5SDimitry Andric (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 2990*0b57cec5SDimitry Andric } 2991*0b57cec5SDimitry Andric 2992*0b57cec5SDimitry Andric // Repeat for AVX versions of the instructions. 2993*0b57cec5SDimitry Andric let Predicates = [HasAVX] in { 2994*0b57cec5SDimitry Andric def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 2995*0b57cec5SDimitry Andric (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 2996*0b57cec5SDimitry Andric } 2997*0b57cec5SDimitry Andric} 2998*0b57cec5SDimitry Andric 2999*0b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 3000*0b57cec5SDimitry Andric v4f32, UseSSE1>; 3001*0b57cec5SDimitry Andricdefm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 3002*0b57cec5SDimitry Andric v4f32, UseSSE1>; 3003*0b57cec5SDimitry Andric 3004*0b57cec5SDimitry Andric 3005*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3006*0b57cec5SDimitry Andric// SSE 1 & 2 - Non-temporal stores 3007*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3008*0b57cec5SDimitry Andric 3009*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 3010*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3011*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3012*0b57cec5SDimitry Andricdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3013*0b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 3014*0b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 3015*0b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), 3016*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 3017*0b57cec5SDimitry Andricdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3018*0b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src), 3019*0b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 3020*0b57cec5SDimitry Andric [(alignednontemporalstore (v2f64 VR128:$src), 3021*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG; 3022*0b57cec5SDimitry Andric} // SchedRW 3023*0b57cec5SDimitry Andric 3024*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 3025*0b57cec5SDimitry Andricdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3026*0b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 3027*0b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 3028*0b57cec5SDimitry Andric [(alignednontemporalstore (v8f32 VR256:$src), 3029*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3030*0b57cec5SDimitry Andricdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3031*0b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src), 3032*0b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 3033*0b57cec5SDimitry Andric [(alignednontemporalstore (v4f64 VR256:$src), 3034*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3035*0b57cec5SDimitry Andric} // SchedRW 3036*0b57cec5SDimitry Andric 3037*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3038*0b57cec5SDimitry Andricdef VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3039*0b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 3040*0b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 3041*0b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), 3042*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_WIG, 3043*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 3044*0b57cec5SDimitry Andricdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3045*0b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 3046*0b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 3047*0b57cec5SDimitry Andric [(alignednontemporalstore (v4i64 VR256:$src), 3048*0b57cec5SDimitry Andric addr:$dst)]>, VEX, VEX_L, VEX_WIG, 3049*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 3050*0b57cec5SDimitry Andric} // ExeDomain 3051*0b57cec5SDimitry Andric} // Predicates 3052*0b57cec5SDimitry Andric 3053*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3054*0b57cec5SDimitry Andricdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3055*0b57cec5SDimitry Andric "movntps\t{$src, $dst|$dst, $src}", 3056*0b57cec5SDimitry Andric [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 3057*0b57cec5SDimitry Andricdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3058*0b57cec5SDimitry Andric "movntpd\t{$src, $dst|$dst, $src}", 3059*0b57cec5SDimitry Andric [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 3060*0b57cec5SDimitry Andric} // SchedRW 3061*0b57cec5SDimitry Andric 3062*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 3063*0b57cec5SDimitry Andricdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3064*0b57cec5SDimitry Andric "movntdq\t{$src, $dst|$dst, $src}", 3065*0b57cec5SDimitry Andric [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 3066*0b57cec5SDimitry Andric 3067*0b57cec5SDimitry Andriclet SchedRW = [WriteStoreNT] in { 3068*0b57cec5SDimitry Andric// There is no AVX form for instructions below this point 3069*0b57cec5SDimitry Andricdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3070*0b57cec5SDimitry Andric "movnti{l}\t{$src, $dst|$dst, $src}", 3071*0b57cec5SDimitry Andric [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 3072*0b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 3073*0b57cec5SDimitry Andricdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3074*0b57cec5SDimitry Andric "movnti{q}\t{$src, $dst|$dst, $src}", 3075*0b57cec5SDimitry Andric [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 3076*0b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 3077*0b57cec5SDimitry Andric} // SchedRW = [WriteStoreNT] 3078*0b57cec5SDimitry Andric 3079*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3080*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 3081*0b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3082*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 3083*0b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3084*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 3085*0b57cec5SDimitry Andric (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3086*0b57cec5SDimitry Andric 3087*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3088*0b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 3089*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3090*0b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 3091*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3092*0b57cec5SDimitry Andric (VMOVNTDQmr addr:$dst, VR128:$src)>; 3093*0b57cec5SDimitry Andric} 3094*0b57cec5SDimitry Andric 3095*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 3096*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3097*0b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 3098*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3099*0b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 3100*0b57cec5SDimitry Andric def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3101*0b57cec5SDimitry Andric (MOVNTDQmr addr:$dst, VR128:$src)>; 3102*0b57cec5SDimitry Andric} 3103*0b57cec5SDimitry Andric 3104*0b57cec5SDimitry Andric} // AddedComplexity 3105*0b57cec5SDimitry Andric 3106*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3107*0b57cec5SDimitry Andric// SSE 1 & 2 - Prefetch and memory fence 3108*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3109*0b57cec5SDimitry Andric 3110*0b57cec5SDimitry Andric// Prefetch intrinsic. 3111*0b57cec5SDimitry Andriclet Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 3112*0b57cec5SDimitry Andricdef PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3113*0b57cec5SDimitry Andric "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; 3114*0b57cec5SDimitry Andricdef PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3115*0b57cec5SDimitry Andric "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; 3116*0b57cec5SDimitry Andricdef PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3117*0b57cec5SDimitry Andric "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; 3118*0b57cec5SDimitry Andricdef PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3119*0b57cec5SDimitry Andric "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; 3120*0b57cec5SDimitry Andric} 3121*0b57cec5SDimitry Andric 3122*0b57cec5SDimitry Andric// FIXME: How should flush instruction be modeled? 3123*0b57cec5SDimitry Andriclet SchedRW = [WriteLoad] in { 3124*0b57cec5SDimitry Andric// Flush cache 3125*0b57cec5SDimitry Andricdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3126*0b57cec5SDimitry Andric "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3127*0b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 3128*0b57cec5SDimitry Andric} 3129*0b57cec5SDimitry Andric 3130*0b57cec5SDimitry Andriclet SchedRW = [WriteNop] in { 3131*0b57cec5SDimitry Andric// Pause. This "instruction" is encoded as "rep; nop", so even though it 3132*0b57cec5SDimitry Andric// was introduced with SSE2, it's backward compatible. 3133*0b57cec5SDimitry Andricdef PAUSE : I<0x90, RawFrm, (outs), (ins), 3134*0b57cec5SDimitry Andric "pause", [(int_x86_sse2_pause)]>, OBXS; 3135*0b57cec5SDimitry Andric} 3136*0b57cec5SDimitry Andric 3137*0b57cec5SDimitry Andriclet SchedRW = [WriteFence] in { 3138*0b57cec5SDimitry Andric// Load, store, and memory fence 3139*0b57cec5SDimitry Andric// TODO: As with mfence, we may want to ease the availablity of sfence/lfence 3140*0b57cec5SDimitry Andric// to include any 64-bit target. 3141*0b57cec5SDimitry Andricdef SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 3142*0b57cec5SDimitry Andric PS, Requires<[HasSSE1]>; 3143*0b57cec5SDimitry Andricdef LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 3144*0b57cec5SDimitry Andric PS, Requires<[HasSSE2]>; 3145*0b57cec5SDimitry Andricdef MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 3146*0b57cec5SDimitry Andric PS, Requires<[HasMFence]>; 3147*0b57cec5SDimitry Andric} // SchedRW 3148*0b57cec5SDimitry Andric 3149*0b57cec5SDimitry Andricdef : Pat<(X86MFence), (MFENCE)>; 3150*0b57cec5SDimitry Andric 3151*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3152*0b57cec5SDimitry Andric// SSE 1 & 2 - Load/Store XCSR register 3153*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3154*0b57cec5SDimitry Andric 3155*0b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in 3156*0b57cec5SDimitry Andricdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3157*0b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3158*0b57cec5SDimitry Andric VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; 3159*0b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in 3160*0b57cec5SDimitry Andricdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3161*0b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3162*0b57cec5SDimitry Andric VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; 3163*0b57cec5SDimitry Andric 3164*0b57cec5SDimitry Andriclet mayLoad=1, hasSideEffects=1 in 3165*0b57cec5SDimitry Andricdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 3166*0b57cec5SDimitry Andric "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3167*0b57cec5SDimitry Andric TB, Sched<[WriteLDMXCSR]>; 3168*0b57cec5SDimitry Andriclet mayStore=1, hasSideEffects=1 in 3169*0b57cec5SDimitry Andricdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3170*0b57cec5SDimitry Andric "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3171*0b57cec5SDimitry Andric TB, Sched<[WriteSTMXCSR]>; 3172*0b57cec5SDimitry Andric 3173*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3174*0b57cec5SDimitry Andric// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3175*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3176*0b57cec5SDimitry Andric 3177*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 3178*0b57cec5SDimitry Andric 3179*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 3180*0b57cec5SDimitry Andricdef VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3181*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3182*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3183*0b57cec5SDimitry Andricdef VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3184*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3185*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3186*0b57cec5SDimitry Andricdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3187*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3188*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3189*0b57cec5SDimitry Andricdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3190*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3191*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3192*0b57cec5SDimitry Andric} 3193*0b57cec5SDimitry Andric 3194*0b57cec5SDimitry Andric// For Disassembler 3195*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3196*0b57cec5SDimitry Andricdef VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3197*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3198*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3199*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; 3200*0b57cec5SDimitry Andricdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3201*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3202*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3203*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; 3204*0b57cec5SDimitry Andricdef VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3205*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3206*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3207*0b57cec5SDimitry Andric VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; 3208*0b57cec5SDimitry Andricdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3209*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3210*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3211*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; 3212*0b57cec5SDimitry Andric} 3213*0b57cec5SDimitry Andric 3214*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3215*0b57cec5SDimitry Andric hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3216*0b57cec5SDimitry Andricdef VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3217*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 3218*0b57cec5SDimitry Andric [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 3219*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 3220*0b57cec5SDimitry Andricdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3221*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3222*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3223*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 3224*0b57cec5SDimitry Andricdef VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3225*0b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 3226*0b57cec5SDimitry Andric [(set VR128:$dst, (loadv2i64 addr:$src))]>, 3227*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, 3228*0b57cec5SDimitry Andric XS, VEX, VEX_WIG; 3229*0b57cec5SDimitry Andricdef VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3230*0b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", []>, 3231*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3232*0b57cec5SDimitry Andric XS, VEX, VEX_L, VEX_WIG; 3233*0b57cec5SDimitry Andric} 3234*0b57cec5SDimitry Andric 3235*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3236*0b57cec5SDimitry Andricdef VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3237*0b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src), 3238*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 3239*0b57cec5SDimitry Andric [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 3240*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; 3241*0b57cec5SDimitry Andricdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3242*0b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src), 3243*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3244*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; 3245*0b57cec5SDimitry Andricdef VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3246*0b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}", 3247*0b57cec5SDimitry Andric [(store (v2i64 VR128:$src), addr:$dst)]>, 3248*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; 3249*0b57cec5SDimitry Andricdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3250*0b57cec5SDimitry Andric "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 3251*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; 3252*0b57cec5SDimitry Andric} 3253*0b57cec5SDimitry Andric 3254*0b57cec5SDimitry Andriclet SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 3255*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 3256*0b57cec5SDimitry Andricdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3257*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>; 3258*0b57cec5SDimitry Andric 3259*0b57cec5SDimitry Andricdef MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3260*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3261*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 3262*0b57cec5SDimitry Andric} 3263*0b57cec5SDimitry Andric 3264*0b57cec5SDimitry Andric// For Disassembler 3265*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3266*0b57cec5SDimitry Andricdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3267*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", []>, 3268*0b57cec5SDimitry Andric FoldGenData<"MOVDQArr">; 3269*0b57cec5SDimitry Andric 3270*0b57cec5SDimitry Andricdef MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3271*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", []>, 3272*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">; 3273*0b57cec5SDimitry Andric} 3274*0b57cec5SDimitry Andric} // SchedRW 3275*0b57cec5SDimitry Andric 3276*0b57cec5SDimitry Andriclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3277*0b57cec5SDimitry Andric hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 3278*0b57cec5SDimitry Andricdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3279*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 3280*0b57cec5SDimitry Andric [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 3281*0b57cec5SDimitry Andricdef MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3282*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 3283*0b57cec5SDimitry Andric [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 3284*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 3285*0b57cec5SDimitry Andric} 3286*0b57cec5SDimitry Andric 3287*0b57cec5SDimitry Andriclet mayStore = 1, hasSideEffects = 0, 3288*0b57cec5SDimitry Andric SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 3289*0b57cec5SDimitry Andricdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3290*0b57cec5SDimitry Andric "movdqa\t{$src, $dst|$dst, $src}", 3291*0b57cec5SDimitry Andric [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 3292*0b57cec5SDimitry Andricdef MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3293*0b57cec5SDimitry Andric "movdqu\t{$src, $dst|$dst, $src}", 3294*0b57cec5SDimitry Andric [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 3295*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 3296*0b57cec5SDimitry Andric} 3297*0b57cec5SDimitry Andric 3298*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3299*0b57cec5SDimitry Andric 3300*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 3301*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3302*0b57cec5SDimitry Andric (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3303*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3304*0b57cec5SDimitry Andric (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 3305*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3306*0b57cec5SDimitry Andric (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3307*0b57cec5SDimitry Andricdef : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3308*0b57cec5SDimitry Andric (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 3309*0b57cec5SDimitry Andric 3310*0b57cec5SDimitry Andric// Reversed version with ".s" suffix for GAS compatibility. 3311*0b57cec5SDimitry Andricdef : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 3312*0b57cec5SDimitry Andric (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3313*0b57cec5SDimitry Andricdef : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 3314*0b57cec5SDimitry Andric (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3315*0b57cec5SDimitry Andric 3316*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3317*0b57cec5SDimitry Andric // Additional patterns for other integer sizes. 3318*0b57cec5SDimitry Andric def : Pat<(alignedloadv4i32 addr:$src), 3319*0b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 3320*0b57cec5SDimitry Andric def : Pat<(alignedloadv8i16 addr:$src), 3321*0b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 3322*0b57cec5SDimitry Andric def : Pat<(alignedloadv16i8 addr:$src), 3323*0b57cec5SDimitry Andric (VMOVDQArm addr:$src)>; 3324*0b57cec5SDimitry Andric def : Pat<(loadv4i32 addr:$src), 3325*0b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 3326*0b57cec5SDimitry Andric def : Pat<(loadv8i16 addr:$src), 3327*0b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 3328*0b57cec5SDimitry Andric def : Pat<(loadv16i8 addr:$src), 3329*0b57cec5SDimitry Andric (VMOVDQUrm addr:$src)>; 3330*0b57cec5SDimitry Andric 3331*0b57cec5SDimitry Andric def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 3332*0b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 3333*0b57cec5SDimitry Andric def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 3334*0b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 3335*0b57cec5SDimitry Andric def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 3336*0b57cec5SDimitry Andric (VMOVDQAmr addr:$dst, VR128:$src)>; 3337*0b57cec5SDimitry Andric def : Pat<(store (v4i32 VR128:$src), addr:$dst), 3338*0b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 3339*0b57cec5SDimitry Andric def : Pat<(store (v8i16 VR128:$src), addr:$dst), 3340*0b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 3341*0b57cec5SDimitry Andric def : Pat<(store (v16i8 VR128:$src), addr:$dst), 3342*0b57cec5SDimitry Andric (VMOVDQUmr addr:$dst, VR128:$src)>; 3343*0b57cec5SDimitry Andric} 3344*0b57cec5SDimitry Andric 3345*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3346*0b57cec5SDimitry Andric// SSE2 - Packed Integer Arithmetic Instructions 3347*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3348*0b57cec5SDimitry Andric 3349*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { // SSE integer instructions 3350*0b57cec5SDimitry Andric 3351*0b57cec5SDimitry Andric/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 3352*0b57cec5SDimitry Andricmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3353*0b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3354*0b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 3355*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3356*0b57cec5SDimitry Andric let isCommutable = 1 in 3357*0b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3358*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 3359*0b57cec5SDimitry Andric !if(Is2Addr, 3360*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3361*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3362*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 3363*0b57cec5SDimitry Andric Sched<[sched]>; 3364*0b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3365*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 3366*0b57cec5SDimitry Andric !if(Is2Addr, 3367*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3368*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3369*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 3370*0b57cec5SDimitry Andric (memop_frag addr:$src2))))]>, 3371*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 3372*0b57cec5SDimitry Andric} 3373*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3374*0b57cec5SDimitry Andric 3375*0b57cec5SDimitry Andricdefm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 3376*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3377*0b57cec5SDimitry Andricdefm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 3378*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3379*0b57cec5SDimitry Andricdefm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 3380*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 3381*0b57cec5SDimitry Andricdefm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 3382*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX>; 3383*0b57cec5SDimitry Andricdefm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 3384*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3385*0b57cec5SDimitry Andricdefm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 3386*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3387*0b57cec5SDimitry Andricdefm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 3388*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3389*0b57cec5SDimitry Andricdefm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 3390*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3391*0b57cec5SDimitry Andricdefm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 3392*0b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3393*0b57cec5SDimitry Andricdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 3394*0b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3395*0b57cec5SDimitry Andricdefm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 3396*0b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3397*0b57cec5SDimitry Andricdefm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 3398*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3399*0b57cec5SDimitry Andricdefm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 3400*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3401*0b57cec5SDimitry Andricdefm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 3402*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 3403*0b57cec5SDimitry Andricdefm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 3404*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX>; 3405*0b57cec5SDimitry Andricdefm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 3406*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3407*0b57cec5SDimitry Andricdefm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 3408*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3409*0b57cec5SDimitry Andricdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 3410*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3411*0b57cec5SDimitry Andricdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 3412*0b57cec5SDimitry Andric SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3413*0b57cec5SDimitry Andricdefm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 3414*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3415*0b57cec5SDimitry Andricdefm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 3416*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3417*0b57cec5SDimitry Andricdefm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 3418*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3419*0b57cec5SDimitry Andricdefm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 3420*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3421*0b57cec5SDimitry Andricdefm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, 3422*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3423*0b57cec5SDimitry Andricdefm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, 3424*0b57cec5SDimitry Andric SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3425*0b57cec5SDimitry Andricdefm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 3426*0b57cec5SDimitry Andric SchedWriteVecIMul, 1, NoVLX>; 3427*0b57cec5SDimitry Andric 3428*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3429*0b57cec5SDimitry Andricdefm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3430*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 3431*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3432*0b57cec5SDimitry Andric 3433*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3434*0b57cec5SDimitry Andricdefm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 3435*0b57cec5SDimitry Andric VR256, load, i256mem, SchedWriteVecIMul.YMM, 3436*0b57cec5SDimitry Andric 0>, VEX_4V, VEX_L, VEX_WIG; 3437*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 3438*0b57cec5SDimitry Andricdefm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3439*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM>; 3440*0b57cec5SDimitry Andric 3441*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3442*0b57cec5SDimitry Andricdefm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 3443*0b57cec5SDimitry Andric load, i128mem, SchedWritePSADBW.XMM, 0>, 3444*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3445*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3446*0b57cec5SDimitry Andricdefm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 3447*0b57cec5SDimitry Andric load, i256mem, SchedWritePSADBW.YMM, 0>, 3448*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3449*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 3450*0b57cec5SDimitry Andricdefm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 3451*0b57cec5SDimitry Andric memop, i128mem, SchedWritePSADBW.XMM>; 3452*0b57cec5SDimitry Andric 3453*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3454*0b57cec5SDimitry Andric// SSE2 - Packed Integer Logical Instructions 3455*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3456*0b57cec5SDimitry Andric 3457*0b57cec5SDimitry Andricmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3458*0b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 3459*0b57cec5SDimitry Andric SDNode OpNode2, RegisterClass RC, 3460*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, 3461*0b57cec5SDimitry Andric X86FoldableSchedWrite schedImm, 3462*0b57cec5SDimitry Andric ValueType DstVT, ValueType SrcVT, 3463*0b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 3464*0b57cec5SDimitry Andric // src2 is always 128-bit 3465*0b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3466*0b57cec5SDimitry Andric (ins RC:$src1, VR128:$src2), 3467*0b57cec5SDimitry Andric !if(Is2Addr, 3468*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3469*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3470*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 3471*0b57cec5SDimitry Andric Sched<[sched]>; 3472*0b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3473*0b57cec5SDimitry Andric (ins RC:$src1, i128mem:$src2), 3474*0b57cec5SDimitry Andric !if(Is2Addr, 3475*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3476*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3477*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode RC:$src1, 3478*0b57cec5SDimitry Andric (SrcVT (ld_frag addr:$src2)))))]>, 3479*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 3480*0b57cec5SDimitry Andric def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3481*0b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 3482*0b57cec5SDimitry Andric !if(Is2Addr, 3483*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3484*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3485*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>, 3486*0b57cec5SDimitry Andric Sched<[schedImm]>; 3487*0b57cec5SDimitry Andric} 3488*0b57cec5SDimitry Andric 3489*0b57cec5SDimitry Andricmulticlass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 3490*0b57cec5SDimitry Andric string OpcodeStr, SDNode OpNode, 3491*0b57cec5SDimitry Andric SDNode OpNode2, ValueType DstVT128, 3492*0b57cec5SDimitry Andric ValueType DstVT256, ValueType SrcVT, 3493*0b57cec5SDimitry Andric X86SchedWriteWidths sched, 3494*0b57cec5SDimitry Andric X86SchedWriteWidths schedImm, Predicate prd> { 3495*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in 3496*0b57cec5SDimitry Andric defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3497*0b57cec5SDimitry Andric OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 3498*0b57cec5SDimitry Andric DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG; 3499*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in 3500*0b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3501*0b57cec5SDimitry Andric OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 3502*0b57cec5SDimitry Andric DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, 3503*0b57cec5SDimitry Andric VEX_WIG; 3504*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 3505*0b57cec5SDimitry Andric defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 3506*0b57cec5SDimitry Andric VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 3507*0b57cec5SDimitry Andric memop>; 3508*0b57cec5SDimitry Andric} 3509*0b57cec5SDimitry Andric 3510*0b57cec5SDimitry Andricmulticlass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 3511*0b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, ValueType VT, 3512*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3513*0b57cec5SDimitry Andric def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 3514*0b57cec5SDimitry Andric !if(Is2Addr, 3515*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3516*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3517*0b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>, 3518*0b57cec5SDimitry Andric Sched<[sched]>; 3519*0b57cec5SDimitry Andric} 3520*0b57cec5SDimitry Andric 3521*0b57cec5SDimitry Andricmulticlass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 3522*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 3523*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3524*0b57cec5SDimitry Andric defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3525*0b57cec5SDimitry Andric VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG; 3526*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3527*0b57cec5SDimitry Andric defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3528*0b57cec5SDimitry Andric VR256, v32i8, sched.YMM, 0>, 3529*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3530*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 3531*0b57cec5SDimitry Andric defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 3532*0b57cec5SDimitry Andric sched.XMM>; 3533*0b57cec5SDimitry Andric} 3534*0b57cec5SDimitry Andric 3535*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3536*0b57cec5SDimitry Andric defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 3537*0b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 3538*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3539*0b57cec5SDimitry Andric defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 3540*0b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 3541*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 3542*0b57cec5SDimitry Andric defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 3543*0b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 3544*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 3545*0b57cec5SDimitry Andric 3546*0b57cec5SDimitry Andric defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 3547*0b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 3548*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3549*0b57cec5SDimitry Andric defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 3550*0b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 3551*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 3552*0b57cec5SDimitry Andric defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 3553*0b57cec5SDimitry Andric v2i64, v4i64, v2i64, SchedWriteVecShift, 3554*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 3555*0b57cec5SDimitry Andric 3556*0b57cec5SDimitry Andric defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 3557*0b57cec5SDimitry Andric v8i16, v16i16, v8i16, SchedWriteVecShift, 3558*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3559*0b57cec5SDimitry Andric defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 3560*0b57cec5SDimitry Andric v4i32, v8i32, v4i32, SchedWriteVecShift, 3561*0b57cec5SDimitry Andric SchedWriteVecShiftImm, NoVLX>; 3562*0b57cec5SDimitry Andric 3563*0b57cec5SDimitry Andric defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 3564*0b57cec5SDimitry Andric SchedWriteShuffle>; 3565*0b57cec5SDimitry Andric defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 3566*0b57cec5SDimitry Andric SchedWriteShuffle>; 3567*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3568*0b57cec5SDimitry Andric 3569*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3570*0b57cec5SDimitry Andric// SSE2 - Packed Integer Comparison Instructions 3571*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3572*0b57cec5SDimitry Andric 3573*0b57cec5SDimitry Andricdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 3574*0b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 3575*0b57cec5SDimitry Andricdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 3576*0b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 3577*0b57cec5SDimitry Andricdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 3578*0b57cec5SDimitry Andric SchedWriteVecALU, 1, TruePredicate>; 3579*0b57cec5SDimitry Andricdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 3580*0b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 3581*0b57cec5SDimitry Andricdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 3582*0b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 3583*0b57cec5SDimitry Andricdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 3584*0b57cec5SDimitry Andric SchedWriteVecALU, 0, TruePredicate>; 3585*0b57cec5SDimitry Andric 3586*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3587*0b57cec5SDimitry Andric// SSE2 - Packed Integer Shuffle Instructions 3588*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3589*0b57cec5SDimitry Andric 3590*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3591*0b57cec5SDimitry Andricmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 3592*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, 3593*0b57cec5SDimitry Andric Predicate prd> { 3594*0b57cec5SDimitry Andriclet Predicates = [HasAVX, prd] in { 3595*0b57cec5SDimitry Andric def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 3596*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 3597*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 3598*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3599*0b57cec5SDimitry Andric [(set VR128:$dst, 3600*0b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, 3601*0b57cec5SDimitry Andric VEX, Sched<[sched.XMM]>, VEX_WIG; 3602*0b57cec5SDimitry Andric def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 3603*0b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 3604*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 3605*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3606*0b57cec5SDimitry Andric [(set VR128:$dst, 3607*0b57cec5SDimitry Andric (vt128 (OpNode (load addr:$src1), 3608*0b57cec5SDimitry Andric (i8 imm:$src2))))]>, VEX, 3609*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>, VEX_WIG; 3610*0b57cec5SDimitry Andric} 3611*0b57cec5SDimitry Andric 3612*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, prd] in { 3613*0b57cec5SDimitry Andric def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 3614*0b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 3615*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 3616*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3617*0b57cec5SDimitry Andric [(set VR256:$dst, 3618*0b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>, 3619*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 3620*0b57cec5SDimitry Andric def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 3621*0b57cec5SDimitry Andric (ins i256mem:$src1, u8imm:$src2), 3622*0b57cec5SDimitry Andric !strconcat("v", OpcodeStr, 3623*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3624*0b57cec5SDimitry Andric [(set VR256:$dst, 3625*0b57cec5SDimitry Andric (vt256 (OpNode (load addr:$src1), 3626*0b57cec5SDimitry Andric (i8 imm:$src2))))]>, VEX, VEX_L, 3627*0b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>, VEX_WIG; 3628*0b57cec5SDimitry Andric} 3629*0b57cec5SDimitry Andric 3630*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 3631*0b57cec5SDimitry Andric def ri : Ii8<0x70, MRMSrcReg, 3632*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 3633*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3634*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3635*0b57cec5SDimitry Andric [(set VR128:$dst, 3636*0b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, 3637*0b57cec5SDimitry Andric Sched<[sched.XMM]>; 3638*0b57cec5SDimitry Andric def mi : Ii8<0x70, MRMSrcMem, 3639*0b57cec5SDimitry Andric (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 3640*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3641*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3642*0b57cec5SDimitry Andric [(set VR128:$dst, 3643*0b57cec5SDimitry Andric (vt128 (OpNode (memop addr:$src1), 3644*0b57cec5SDimitry Andric (i8 imm:$src2))))]>, 3645*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 3646*0b57cec5SDimitry Andric} 3647*0b57cec5SDimitry Andric} 3648*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3649*0b57cec5SDimitry Andric 3650*0b57cec5SDimitry Andricdefm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 3651*0b57cec5SDimitry Andric SchedWriteShuffle, NoVLX>, PD; 3652*0b57cec5SDimitry Andricdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 3653*0b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; 3654*0b57cec5SDimitry Andricdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 3655*0b57cec5SDimitry Andric SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; 3656*0b57cec5SDimitry Andric 3657*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3658*0b57cec5SDimitry Andric// Packed Integer Pack Instructions (SSE & AVX) 3659*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3660*0b57cec5SDimitry Andric 3661*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3662*0b57cec5SDimitry Andricmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3663*0b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3664*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 3665*0b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 3666*0b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 3667*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 3668*0b57cec5SDimitry Andric !if(Is2Addr, 3669*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3670*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3671*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3672*0b57cec5SDimitry Andric [(set RC:$dst, 3673*0b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3674*0b57cec5SDimitry Andric Sched<[sched]>; 3675*0b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 3676*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3677*0b57cec5SDimitry Andric !if(Is2Addr, 3678*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3679*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3680*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3681*0b57cec5SDimitry Andric [(set RC:$dst, 3682*0b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 3683*0b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 3684*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 3685*0b57cec5SDimitry Andric} 3686*0b57cec5SDimitry Andric 3687*0b57cec5SDimitry Andricmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3688*0b57cec5SDimitry Andric ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3689*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 3690*0b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 3691*0b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, 3692*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 3693*0b57cec5SDimitry Andric !if(Is2Addr, 3694*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3695*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3696*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3697*0b57cec5SDimitry Andric [(set RC:$dst, 3698*0b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3699*0b57cec5SDimitry Andric Sched<[sched]>; 3700*0b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, 3701*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3702*0b57cec5SDimitry Andric !if(Is2Addr, 3703*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3704*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 3705*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3706*0b57cec5SDimitry Andric [(set RC:$dst, 3707*0b57cec5SDimitry Andric (OutVT (OpNode (ArgVT RC:$src1), 3708*0b57cec5SDimitry Andric (ld_frag addr:$src2))))]>, 3709*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 3710*0b57cec5SDimitry Andric} 3711*0b57cec5SDimitry Andric 3712*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3713*0b57cec5SDimitry Andric defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 3714*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3715*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3716*0b57cec5SDimitry Andric defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 3717*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3718*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3719*0b57cec5SDimitry Andric 3720*0b57cec5SDimitry Andric defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 3721*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3722*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3723*0b57cec5SDimitry Andric defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 3724*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3725*0b57cec5SDimitry Andric VEX_4V; 3726*0b57cec5SDimitry Andric} 3727*0b57cec5SDimitry Andric 3728*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3729*0b57cec5SDimitry Andric defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 3730*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3731*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3732*0b57cec5SDimitry Andric defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 3733*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3734*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3735*0b57cec5SDimitry Andric 3736*0b57cec5SDimitry Andric defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 3737*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3738*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3739*0b57cec5SDimitry Andric defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 3740*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3741*0b57cec5SDimitry Andric VEX_4V, VEX_L; 3742*0b57cec5SDimitry Andric} 3743*0b57cec5SDimitry Andric 3744*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 3745*0b57cec5SDimitry Andric defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 3746*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3747*0b57cec5SDimitry Andric defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 3748*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3749*0b57cec5SDimitry Andric 3750*0b57cec5SDimitry Andric defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 3751*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3752*0b57cec5SDimitry Andric 3753*0b57cec5SDimitry Andric defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 3754*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3755*0b57cec5SDimitry Andric} 3756*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3757*0b57cec5SDimitry Andric 3758*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3759*0b57cec5SDimitry Andric// SSE2 - Packed Integer Unpack Instructions 3760*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3761*0b57cec5SDimitry Andric 3762*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3763*0b57cec5SDimitry Andricmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 3764*0b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 3765*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 3766*0b57cec5SDimitry Andric bit Is2Addr = 1> { 3767*0b57cec5SDimitry Andric def rr : PDI<opc, MRMSrcReg, 3768*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 3769*0b57cec5SDimitry Andric !if(Is2Addr, 3770*0b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3771*0b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3772*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 3773*0b57cec5SDimitry Andric Sched<[sched]>; 3774*0b57cec5SDimitry Andric def rm : PDI<opc, MRMSrcMem, 3775*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3776*0b57cec5SDimitry Andric !if(Is2Addr, 3777*0b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3778*0b57cec5SDimitry Andric !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3779*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 3780*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 3781*0b57cec5SDimitry Andric} 3782*0b57cec5SDimitry Andric 3783*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3784*0b57cec5SDimitry Andric defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 3785*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3786*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3787*0b57cec5SDimitry Andric defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 3788*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3789*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3790*0b57cec5SDimitry Andric defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 3791*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3792*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3793*0b57cec5SDimitry Andric defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 3794*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3795*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3796*0b57cec5SDimitry Andric} 3797*0b57cec5SDimitry Andric 3798*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 3799*0b57cec5SDimitry Andric defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 3800*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3801*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3802*0b57cec5SDimitry Andric defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 3803*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3804*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3805*0b57cec5SDimitry Andric defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 3806*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3807*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3808*0b57cec5SDimitry Andric defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 3809*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, load, 0>, 3810*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 3811*0b57cec5SDimitry Andric} 3812*0b57cec5SDimitry Andric 3813*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3814*0b57cec5SDimitry Andric defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 3815*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3816*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3817*0b57cec5SDimitry Andric defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 3818*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3819*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3820*0b57cec5SDimitry Andric defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 3821*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3822*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3823*0b57cec5SDimitry Andric defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 3824*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3825*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3826*0b57cec5SDimitry Andric} 3827*0b57cec5SDimitry Andric 3828*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 3829*0b57cec5SDimitry Andric defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 3830*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3831*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3832*0b57cec5SDimitry Andric defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 3833*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3834*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3835*0b57cec5SDimitry Andric defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 3836*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3837*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3838*0b57cec5SDimitry Andric defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 3839*0b57cec5SDimitry Andric i256mem, SchedWriteShuffle.YMM, load, 0>, 3840*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 3841*0b57cec5SDimitry Andric} 3842*0b57cec5SDimitry Andric 3843*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 3844*0b57cec5SDimitry Andric defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 3845*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3846*0b57cec5SDimitry Andric defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 3847*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3848*0b57cec5SDimitry Andric defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 3849*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3850*0b57cec5SDimitry Andric defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 3851*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3852*0b57cec5SDimitry Andric 3853*0b57cec5SDimitry Andric defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 3854*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3855*0b57cec5SDimitry Andric defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 3856*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3857*0b57cec5SDimitry Andric defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 3858*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3859*0b57cec5SDimitry Andric defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 3860*0b57cec5SDimitry Andric i128mem, SchedWriteShuffle.XMM, memop>; 3861*0b57cec5SDimitry Andric} 3862*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3863*0b57cec5SDimitry Andric 3864*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3865*0b57cec5SDimitry Andric// SSE2 - Packed Integer Extract and Insert 3866*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3867*0b57cec5SDimitry Andric 3868*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3869*0b57cec5SDimitry Andricmulticlass sse2_pinsrw<bit Is2Addr = 1> { 3870*0b57cec5SDimitry Andric def rr : Ii8<0xC4, MRMSrcReg, 3871*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 3872*0b57cec5SDimitry Andric GR32orGR64:$src2, u8imm:$src3), 3873*0b57cec5SDimitry Andric !if(Is2Addr, 3874*0b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3875*0b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3876*0b57cec5SDimitry Andric [(set VR128:$dst, 3877*0b57cec5SDimitry Andric (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 3878*0b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 3879*0b57cec5SDimitry Andric def rm : Ii8<0xC4, MRMSrcMem, 3880*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, 3881*0b57cec5SDimitry Andric i16mem:$src2, u8imm:$src3), 3882*0b57cec5SDimitry Andric !if(Is2Addr, 3883*0b57cec5SDimitry Andric "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3884*0b57cec5SDimitry Andric "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3885*0b57cec5SDimitry Andric [(set VR128:$dst, 3886*0b57cec5SDimitry Andric (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 3887*0b57cec5SDimitry Andric imm:$src3))]>, 3888*0b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 3889*0b57cec5SDimitry Andric} 3890*0b57cec5SDimitry Andric 3891*0b57cec5SDimitry Andric// Extract 3892*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 3893*0b57cec5SDimitry Andricdef VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 3894*0b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3895*0b57cec5SDimitry Andric "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3896*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3897*0b57cec5SDimitry Andric imm:$src2))]>, 3898*0b57cec5SDimitry Andric PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; 3899*0b57cec5SDimitry Andricdef PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 3900*0b57cec5SDimitry Andric (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3901*0b57cec5SDimitry Andric "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3902*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3903*0b57cec5SDimitry Andric imm:$src2))]>, 3904*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 3905*0b57cec5SDimitry Andric 3906*0b57cec5SDimitry Andric// Insert 3907*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 3908*0b57cec5SDimitry Andricdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG; 3909*0b57cec5SDimitry Andric 3910*0b57cec5SDimitry Andriclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 3911*0b57cec5SDimitry Andricdefm PINSRW : sse2_pinsrw, PD; 3912*0b57cec5SDimitry Andric 3913*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3914*0b57cec5SDimitry Andric 3915*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3916*0b57cec5SDimitry Andric// SSE2 - Packed Mask Creation 3917*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3918*0b57cec5SDimitry Andric 3919*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3920*0b57cec5SDimitry Andric 3921*0b57cec5SDimitry Andricdef VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3922*0b57cec5SDimitry Andric (ins VR128:$src), 3923*0b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 3924*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3925*0b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG; 3926*0b57cec5SDimitry Andric 3927*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 3928*0b57cec5SDimitry Andricdef VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3929*0b57cec5SDimitry Andric (ins VR256:$src), 3930*0b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 3931*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 3932*0b57cec5SDimitry Andric Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG; 3933*0b57cec5SDimitry Andric} 3934*0b57cec5SDimitry Andric 3935*0b57cec5SDimitry Andricdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 3936*0b57cec5SDimitry Andric "pmovmskb\t{$src, $dst|$dst, $src}", 3937*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3938*0b57cec5SDimitry Andric Sched<[WriteVecMOVMSK]>; 3939*0b57cec5SDimitry Andric 3940*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3941*0b57cec5SDimitry Andric 3942*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3943*0b57cec5SDimitry Andric// SSE2 - Conditional Store 3944*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3945*0b57cec5SDimitry Andric 3946*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 3947*0b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 3948*0b57cec5SDimitry Andricdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 3949*0b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 3950*0b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 3951*0b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 3952*0b57cec5SDimitry Andric VEX, VEX_WIG; 3953*0b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 3954*0b57cec5SDimitry Andricdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 3955*0b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 3956*0b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 3957*0b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 3958*0b57cec5SDimitry Andric VEX, VEX_WIG; 3959*0b57cec5SDimitry Andric 3960*0b57cec5SDimitry Andriclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 3961*0b57cec5SDimitry Andricdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 3962*0b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 3963*0b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 3964*0b57cec5SDimitry Andriclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 3965*0b57cec5SDimitry Andricdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 3966*0b57cec5SDimitry Andric "maskmovdqu\t{$mask, $src|$src, $mask}", 3967*0b57cec5SDimitry Andric [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; 3968*0b57cec5SDimitry Andric 3969*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 3970*0b57cec5SDimitry Andric 3971*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3972*0b57cec5SDimitry Andric// SSE2 - Move Doubleword/Quadword 3973*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3974*0b57cec5SDimitry Andric 3975*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 3976*0b57cec5SDimitry Andric// Move Int Doubleword to Packed Double Int 3977*0b57cec5SDimitry Andric// 3978*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 3979*0b57cec5SDimitry Andricdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 3980*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 3981*0b57cec5SDimitry Andric [(set VR128:$dst, 3982*0b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 3983*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 3984*0b57cec5SDimitry Andricdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 3985*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 3986*0b57cec5SDimitry Andric [(set VR128:$dst, 3987*0b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3988*0b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 3989*0b57cec5SDimitry Andricdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 3990*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 3991*0b57cec5SDimitry Andric [(set VR128:$dst, 3992*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 3993*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 3994*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3995*0b57cec5SDimitry Andricdef VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 3996*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 3997*0b57cec5SDimitry Andric VEX, Sched<[WriteVecLoad]>; 3998*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 3999*0b57cec5SDimitry Andricdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4000*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4001*0b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 4002*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 4003*0b57cec5SDimitry Andric 4004*0b57cec5SDimitry Andricdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4005*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4006*0b57cec5SDimitry Andric [(set VR128:$dst, 4007*0b57cec5SDimitry Andric (v4i32 (scalar_to_vector GR32:$src)))]>, 4008*0b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 4009*0b57cec5SDimitry Andricdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4010*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4011*0b57cec5SDimitry Andric [(set VR128:$dst, 4012*0b57cec5SDimitry Andric (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4013*0b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 4014*0b57cec5SDimitry Andricdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4015*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4016*0b57cec5SDimitry Andric [(set VR128:$dst, 4017*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector GR64:$src)))]>, 4018*0b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 4019*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4020*0b57cec5SDimitry Andricdef MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4021*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 4022*0b57cec5SDimitry Andric Sched<[WriteVecLoad]>; 4023*0b57cec5SDimitry Andriclet isCodeGenOnly = 1 in 4024*0b57cec5SDimitry Andricdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4025*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4026*0b57cec5SDimitry Andric [(set FR64:$dst, (bitconvert GR64:$src))]>, 4027*0b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 4028*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 4029*0b57cec5SDimitry Andric 4030*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4031*0b57cec5SDimitry Andric// Move Int Doubleword to Single Scalar 4032*0b57cec5SDimitry Andric// 4033*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4034*0b57cec5SDimitry Andric def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4035*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4036*0b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 4037*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveFromGpr]>; 4038*0b57cec5SDimitry Andric 4039*0b57cec5SDimitry Andric def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4040*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4041*0b57cec5SDimitry Andric [(set FR32:$dst, (bitconvert GR32:$src))]>, 4042*0b57cec5SDimitry Andric Sched<[WriteVecMoveFromGpr]>; 4043*0b57cec5SDimitry Andric 4044*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4045*0b57cec5SDimitry Andric 4046*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4047*0b57cec5SDimitry Andric// Move Packed Doubleword Int to Packed Double Int 4048*0b57cec5SDimitry Andric// 4049*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 4050*0b57cec5SDimitry Andricdef VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4051*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4052*0b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4053*0b57cec5SDimitry Andric (iPTR 0)))]>, VEX, 4054*0b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 4055*0b57cec5SDimitry Andricdef VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4056*0b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src), 4057*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4058*0b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 4059*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 4060*0b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 4061*0b57cec5SDimitry Andricdef MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4062*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4063*0b57cec5SDimitry Andric [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4064*0b57cec5SDimitry Andric (iPTR 0)))]>, 4065*0b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 4066*0b57cec5SDimitry Andricdef MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4067*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4068*0b57cec5SDimitry Andric [(store (i32 (extractelt (v4i32 VR128:$src), 4069*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 4070*0b57cec5SDimitry Andric Sched<[WriteVecStore]>; 4071*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 4072*0b57cec5SDimitry Andric 4073*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4074*0b57cec5SDimitry Andric// Move Packed Doubleword Int first element to Doubleword Int 4075*0b57cec5SDimitry Andric// 4076*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 4077*0b57cec5SDimitry Andriclet SchedRW = [WriteVecMoveToGpr] in { 4078*0b57cec5SDimitry Andricdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4079*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4080*0b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4081*0b57cec5SDimitry Andric (iPTR 0)))]>, 4082*0b57cec5SDimitry Andric VEX; 4083*0b57cec5SDimitry Andric 4084*0b57cec5SDimitry Andricdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4085*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4086*0b57cec5SDimitry Andric [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4087*0b57cec5SDimitry Andric (iPTR 0)))]>; 4088*0b57cec5SDimitry Andric} //SchedRW 4089*0b57cec5SDimitry Andric 4090*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4091*0b57cec5SDimitry Andricdef VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 4092*0b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src), 4093*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 4094*0b57cec5SDimitry Andric VEX, Sched<[WriteVecStore]>; 4095*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4096*0b57cec5SDimitry Andricdef MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4097*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, 4098*0b57cec5SDimitry Andric Sched<[WriteVecStore]>; 4099*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 4100*0b57cec5SDimitry Andric 4101*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4102*0b57cec5SDimitry Andric// Bitcast FR64 <-> GR64 4103*0b57cec5SDimitry Andric// 4104*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4105*0b57cec5SDimitry Andric def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4106*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4107*0b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 4108*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 4109*0b57cec5SDimitry Andric 4110*0b57cec5SDimitry Andric def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4111*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4112*0b57cec5SDimitry Andric [(set GR64:$dst, (bitconvert FR64:$src))]>, 4113*0b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 4114*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4115*0b57cec5SDimitry Andric 4116*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4117*0b57cec5SDimitry Andric// Move Scalar Single to Double Int 4118*0b57cec5SDimitry Andric// 4119*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4120*0b57cec5SDimitry Andric def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4121*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4122*0b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 4123*0b57cec5SDimitry Andric VEX, Sched<[WriteVecMoveToGpr]>; 4124*0b57cec5SDimitry Andric def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4125*0b57cec5SDimitry Andric "movd\t{$src, $dst|$dst, $src}", 4126*0b57cec5SDimitry Andric [(set GR32:$dst, (bitconvert FR32:$src))]>, 4127*0b57cec5SDimitry Andric Sched<[WriteVecMoveToGpr]>; 4128*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4129*0b57cec5SDimitry Andric 4130*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4131*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4132*0b57cec5SDimitry Andric (VMOVDI2PDIrr GR32:$src)>; 4133*0b57cec5SDimitry Andric 4134*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4135*0b57cec5SDimitry Andric (VMOV64toPQIrr GR64:$src)>; 4136*0b57cec5SDimitry Andric 4137*0b57cec5SDimitry Andric // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 4138*0b57cec5SDimitry Andric // These instructions also write zeros in the high part of a 256-bit register. 4139*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4140*0b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 4141*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 4142*0b57cec5SDimitry Andric (VMOVDI2PDIrm addr:$src)>; 4143*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzload32 addr:$src)), 4144*0b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 4145*0b57cec5SDimitry Andric} 4146*0b57cec5SDimitry Andric 4147*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 4148*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4149*0b57cec5SDimitry Andric (MOVDI2PDIrr GR32:$src)>; 4150*0b57cec5SDimitry Andric 4151*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4152*0b57cec5SDimitry Andric (MOV64toPQIrr GR64:$src)>; 4153*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), 4154*0b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 4155*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzload32 addr:$src)), 4156*0b57cec5SDimitry Andric (MOVDI2PDIrm addr:$src)>; 4157*0b57cec5SDimitry Andric} 4158*0b57cec5SDimitry Andric 4159*0b57cec5SDimitry Andric// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 4160*0b57cec5SDimitry Andric// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 4161*0b57cec5SDimitry Andric// these aliases. 4162*0b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4163*0b57cec5SDimitry Andric (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4164*0b57cec5SDimitry Andricdef : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4165*0b57cec5SDimitry Andric (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4166*0b57cec5SDimitry Andric// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4167*0b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4168*0b57cec5SDimitry Andric (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4169*0b57cec5SDimitry Andricdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4170*0b57cec5SDimitry Andric (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4171*0b57cec5SDimitry Andric 4172*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4173*0b57cec5SDimitry Andric// SSE2 - Move Quadword 4174*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4175*0b57cec5SDimitry Andric 4176*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4177*0b57cec5SDimitry Andric// Move Quadword Int to Packed Quadword Int 4178*0b57cec5SDimitry Andric// 4179*0b57cec5SDimitry Andric 4180*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 4181*0b57cec5SDimitry Andricdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4182*0b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 4183*0b57cec5SDimitry Andric [(set VR128:$dst, 4184*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4185*0b57cec5SDimitry Andric VEX, Requires<[UseAVX]>, VEX_WIG; 4186*0b57cec5SDimitry Andricdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4187*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4188*0b57cec5SDimitry Andric [(set VR128:$dst, 4189*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4190*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4191*0b57cec5SDimitry Andric} // ExeDomain, SchedRW 4192*0b57cec5SDimitry Andric 4193*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4194*0b57cec5SDimitry Andric// Move Packed Quadword Int to Quadword Int 4195*0b57cec5SDimitry Andric// 4196*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 4197*0b57cec5SDimitry Andricdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4198*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4199*0b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 4200*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>, 4201*0b57cec5SDimitry Andric VEX, VEX_WIG; 4202*0b57cec5SDimitry Andricdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4203*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4204*0b57cec5SDimitry Andric [(store (i64 (extractelt (v2i64 VR128:$src), 4205*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst)]>; 4206*0b57cec5SDimitry Andric} // ExeDomain, SchedRW 4207*0b57cec5SDimitry Andric 4208*0b57cec5SDimitry Andric// For disassembler only 4209*0b57cec5SDimitry Andriclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4210*0b57cec5SDimitry Andric SchedRW = [SchedWriteVecLogic.XMM] in { 4211*0b57cec5SDimitry Andricdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4212*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; 4213*0b57cec5SDimitry Andricdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4214*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", []>; 4215*0b57cec5SDimitry Andric} 4216*0b57cec5SDimitry Andric 4217*0b57cec5SDimitry Andricdef : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4218*0b57cec5SDimitry Andric (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4219*0b57cec5SDimitry Andricdef : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 4220*0b57cec5SDimitry Andric (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4221*0b57cec5SDimitry Andric 4222*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4223*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), 4224*0b57cec5SDimitry Andric (VMOVQI2PQIrm addr:$src)>; 4225*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzload64 addr:$src)), 4226*0b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 4227*0b57cec5SDimitry Andric 4228*0b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4229*0b57cec5SDimitry Andric (VMOVPQI2QImr addr:$dst, VR128:$src)>; 4230*0b57cec5SDimitry Andric} 4231*0b57cec5SDimitry Andric 4232*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 4233*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 4234*0b57cec5SDimitry Andric 4235*0b57cec5SDimitry Andric def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4236*0b57cec5SDimitry Andric (MOVPQI2QImr addr:$dst, VR128:$src)>; 4237*0b57cec5SDimitry Andric} 4238*0b57cec5SDimitry Andric 4239*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4240*0b57cec5SDimitry Andric// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4241*0b57cec5SDimitry Andric// IA32 document. movq xmm1, xmm2 does clear the high bits. 4242*0b57cec5SDimitry Andric// 4243*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4244*0b57cec5SDimitry Andricdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4245*0b57cec5SDimitry Andric "vmovq\t{$src, $dst|$dst, $src}", 4246*0b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4247*0b57cec5SDimitry Andric XS, VEX, Requires<[UseAVX]>, VEX_WIG; 4248*0b57cec5SDimitry Andricdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4249*0b57cec5SDimitry Andric "movq\t{$src, $dst|$dst, $src}", 4250*0b57cec5SDimitry Andric [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4251*0b57cec5SDimitry Andric XS, Requires<[UseSSE2]>; 4252*0b57cec5SDimitry Andric} // ExeDomain, SchedRW 4253*0b57cec5SDimitry Andric 4254*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4255*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4256*0b57cec5SDimitry Andric (VMOVZPQILo2PQIrr VR128:$src)>; 4257*0b57cec5SDimitry Andric} 4258*0b57cec5SDimitry Andriclet Predicates = [UseSSE2] in { 4259*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4260*0b57cec5SDimitry Andric (MOVZPQILo2PQIrr VR128:$src)>; 4261*0b57cec5SDimitry Andric} 4262*0b57cec5SDimitry Andric 4263*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 4264*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 4265*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 4266*0b57cec5SDimitry Andric (v2f64 (VMOVZPQILo2PQIrr 4267*0b57cec5SDimitry Andric (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 4268*0b57cec5SDimitry Andric sub_xmm)>; 4269*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 4270*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 4271*0b57cec5SDimitry Andric (v2i64 (VMOVZPQILo2PQIrr 4272*0b57cec5SDimitry Andric (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 4273*0b57cec5SDimitry Andric sub_xmm)>; 4274*0b57cec5SDimitry Andric} 4275*0b57cec5SDimitry Andric 4276*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4277*0b57cec5SDimitry Andric// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4278*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4279*0b57cec5SDimitry Andric 4280*0b57cec5SDimitry Andricmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4281*0b57cec5SDimitry Andric ValueType vt, RegisterClass RC, PatFrag mem_frag, 4282*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched> { 4283*0b57cec5SDimitry Andricdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4284*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4285*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src)))]>, 4286*0b57cec5SDimitry Andric Sched<[sched]>; 4287*0b57cec5SDimitry Andricdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4288*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4289*0b57cec5SDimitry Andric [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 4290*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 4291*0b57cec5SDimitry Andric} 4292*0b57cec5SDimitry Andric 4293*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4294*0b57cec5SDimitry Andric defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4295*0b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 4296*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4297*0b57cec5SDimitry Andric defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4298*0b57cec5SDimitry Andric v4f32, VR128, loadv4f32, f128mem, 4299*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4300*0b57cec5SDimitry Andric defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4301*0b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 4302*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4303*0b57cec5SDimitry Andric defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4304*0b57cec5SDimitry Andric v8f32, VR256, loadv8f32, f256mem, 4305*0b57cec5SDimitry Andric SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4306*0b57cec5SDimitry Andric} 4307*0b57cec5SDimitry Andricdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4308*0b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4309*0b57cec5SDimitry Andricdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4310*0b57cec5SDimitry Andric memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4311*0b57cec5SDimitry Andric 4312*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4313*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4314*0b57cec5SDimitry Andric (VMOVSHDUPrr VR128:$src)>; 4315*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 4316*0b57cec5SDimitry Andric (VMOVSHDUPrm addr:$src)>; 4317*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4318*0b57cec5SDimitry Andric (VMOVSLDUPrr VR128:$src)>; 4319*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 4320*0b57cec5SDimitry Andric (VMOVSLDUPrm addr:$src)>; 4321*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4322*0b57cec5SDimitry Andric (VMOVSHDUPYrr VR256:$src)>; 4323*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 4324*0b57cec5SDimitry Andric (VMOVSHDUPYrm addr:$src)>; 4325*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup VR256:$src)), 4326*0b57cec5SDimitry Andric (VMOVSLDUPYrr VR256:$src)>; 4327*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 4328*0b57cec5SDimitry Andric (VMOVSLDUPYrm addr:$src)>; 4329*0b57cec5SDimitry Andric} 4330*0b57cec5SDimitry Andric 4331*0b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 4332*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4333*0b57cec5SDimitry Andric (MOVSHDUPrr VR128:$src)>; 4334*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 4335*0b57cec5SDimitry Andric (MOVSHDUPrm addr:$src)>; 4336*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4337*0b57cec5SDimitry Andric (MOVSLDUPrr VR128:$src)>; 4338*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 4339*0b57cec5SDimitry Andric (MOVSLDUPrm addr:$src)>; 4340*0b57cec5SDimitry Andric} 4341*0b57cec5SDimitry Andric 4342*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4343*0b57cec5SDimitry Andric// SSE3 - Replicate Double FP - MOVDDUP 4344*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4345*0b57cec5SDimitry Andric 4346*0b57cec5SDimitry Andricmulticlass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 4347*0b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4348*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4349*0b57cec5SDimitry Andric [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 4350*0b57cec5SDimitry Andric Sched<[sched.XMM]>; 4351*0b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 4352*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4353*0b57cec5SDimitry Andric [(set VR128:$dst, 4354*0b57cec5SDimitry Andric (v2f64 (X86Movddup 4355*0b57cec5SDimitry Andric (scalar_to_vector (loadf64 addr:$src)))))]>, 4356*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 4357*0b57cec5SDimitry Andric} 4358*0b57cec5SDimitry Andric 4359*0b57cec5SDimitry Andric// FIXME: Merge with above classes when there are patterns for the ymm version 4360*0b57cec5SDimitry Andricmulticlass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 4361*0b57cec5SDimitry Andricdef rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4362*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4363*0b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 4364*0b57cec5SDimitry Andric Sched<[sched.YMM]>; 4365*0b57cec5SDimitry Andricdef rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 4366*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4367*0b57cec5SDimitry Andric [(set VR256:$dst, 4368*0b57cec5SDimitry Andric (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 4369*0b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 4370*0b57cec5SDimitry Andric} 4371*0b57cec5SDimitry Andric 4372*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4373*0b57cec5SDimitry Andric defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 4374*0b57cec5SDimitry Andric VEX, VEX_WIG; 4375*0b57cec5SDimitry Andric defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 4376*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4377*0b57cec5SDimitry Andric} 4378*0b57cec5SDimitry Andric 4379*0b57cec5SDimitry Andricdefm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 4380*0b57cec5SDimitry Andric 4381*0b57cec5SDimitry Andric 4382*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4383*0b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), 4384*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4385*0b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4386*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4387*0b57cec5SDimitry Andric} 4388*0b57cec5SDimitry Andric 4389*0b57cec5SDimitry Andriclet Predicates = [UseSSE3] in { 4390*0b57cec5SDimitry Andric // No need for aligned memory as this only loads 64-bits. 4391*0b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), 4392*0b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 4393*0b57cec5SDimitry Andric def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4394*0b57cec5SDimitry Andric (MOVDDUPrm addr:$src)>; 4395*0b57cec5SDimitry Andric} 4396*0b57cec5SDimitry Andric 4397*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4398*0b57cec5SDimitry Andric// SSE3 - Move Unaligned Integer 4399*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4400*0b57cec5SDimitry Andric 4401*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 4402*0b57cec5SDimitry Andric def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4403*0b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 4404*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4405*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 4406*0b57cec5SDimitry Andric def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4407*0b57cec5SDimitry Andric "vlddqu\t{$src, $dst|$dst, $src}", 4408*0b57cec5SDimitry Andric [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 4409*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; 4410*0b57cec5SDimitry Andric} // Predicates 4411*0b57cec5SDimitry Andric 4412*0b57cec5SDimitry Andricdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4413*0b57cec5SDimitry Andric "lddqu\t{$src, $dst|$dst, $src}", 4414*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4415*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.RM]>; 4416*0b57cec5SDimitry Andric 4417*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4418*0b57cec5SDimitry Andric// SSE3 - Arithmetic 4419*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4420*0b57cec5SDimitry Andric 4421*0b57cec5SDimitry Andricmulticlass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 4422*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 4423*0b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 4424*0b57cec5SDimitry Andric def rr : I<0xD0, MRMSrcReg, 4425*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, RC:$src2), 4426*0b57cec5SDimitry Andric !if(Is2Addr, 4427*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4428*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4429*0b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 4430*0b57cec5SDimitry Andric Sched<[sched]>; 4431*0b57cec5SDimitry Andric def rm : I<0xD0, MRMSrcMem, 4432*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4433*0b57cec5SDimitry Andric !if(Is2Addr, 4434*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4435*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4436*0b57cec5SDimitry Andric [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 4437*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4438*0b57cec5SDimitry Andric} 4439*0b57cec5SDimitry Andric 4440*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 4441*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 4442*0b57cec5SDimitry Andric defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 4443*0b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 4444*0b57cec5SDimitry Andric XD, VEX_4V, VEX_WIG; 4445*0b57cec5SDimitry Andric defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 4446*0b57cec5SDimitry Andric SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 4447*0b57cec5SDimitry Andric XD, VEX_4V, VEX_L, VEX_WIG; 4448*0b57cec5SDimitry Andric } 4449*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 4450*0b57cec5SDimitry Andric defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 4451*0b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 4452*0b57cec5SDimitry Andric PD, VEX_4V, VEX_WIG; 4453*0b57cec5SDimitry Andric defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 4454*0b57cec5SDimitry Andric SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 4455*0b57cec5SDimitry Andric PD, VEX_4V, VEX_L, VEX_WIG; 4456*0b57cec5SDimitry Andric } 4457*0b57cec5SDimitry Andric} 4458*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 4459*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 4460*0b57cec5SDimitry Andric defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 4461*0b57cec5SDimitry Andric SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; 4462*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 4463*0b57cec5SDimitry Andric defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 4464*0b57cec5SDimitry Andric SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; 4465*0b57cec5SDimitry Andric} 4466*0b57cec5SDimitry Andric 4467*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4468*0b57cec5SDimitry Andric// SSE3 Instructions 4469*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4470*0b57cec5SDimitry Andric 4471*0b57cec5SDimitry Andric// Horizontal ops 4472*0b57cec5SDimitry Andricmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4473*0b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 4474*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 4475*0b57cec5SDimitry Andric bit Is2Addr = 1> { 4476*0b57cec5SDimitry Andric def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4477*0b57cec5SDimitry Andric !if(Is2Addr, 4478*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4479*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4480*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4481*0b57cec5SDimitry Andric Sched<[sched]>; 4482*0b57cec5SDimitry Andric 4483*0b57cec5SDimitry Andric def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4484*0b57cec5SDimitry Andric !if(Is2Addr, 4485*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4486*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4487*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4488*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4489*0b57cec5SDimitry Andric} 4490*0b57cec5SDimitry Andricmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4491*0b57cec5SDimitry Andric X86MemOperand x86memop, SDNode OpNode, 4492*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, PatFrag ld_frag, 4493*0b57cec5SDimitry Andric bit Is2Addr = 1> { 4494*0b57cec5SDimitry Andric def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4495*0b57cec5SDimitry Andric !if(Is2Addr, 4496*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4497*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4498*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4499*0b57cec5SDimitry Andric Sched<[sched]>; 4500*0b57cec5SDimitry Andric 4501*0b57cec5SDimitry Andric def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4502*0b57cec5SDimitry Andric !if(Is2Addr, 4503*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4504*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4505*0b57cec5SDimitry Andric [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4506*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4507*0b57cec5SDimitry Andric} 4508*0b57cec5SDimitry Andric 4509*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 4510*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 4511*0b57cec5SDimitry Andric defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4512*0b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4513*0b57cec5SDimitry Andric defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4514*0b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4515*0b57cec5SDimitry Andric defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4516*0b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4517*0b57cec5SDimitry Andric defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4518*0b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4519*0b57cec5SDimitry Andric } 4520*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 4521*0b57cec5SDimitry Andric defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 4522*0b57cec5SDimitry Andric X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4523*0b57cec5SDimitry Andric defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 4524*0b57cec5SDimitry Andric X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4525*0b57cec5SDimitry Andric defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 4526*0b57cec5SDimitry Andric X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4527*0b57cec5SDimitry Andric defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 4528*0b57cec5SDimitry Andric X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4529*0b57cec5SDimitry Andric } 4530*0b57cec5SDimitry Andric} 4531*0b57cec5SDimitry Andric 4532*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 4533*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 4534*0b57cec5SDimitry Andric defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 4535*0b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 4536*0b57cec5SDimitry Andric defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 4537*0b57cec5SDimitry Andric WriteFHAdd, memopv4f32>; 4538*0b57cec5SDimitry Andric } 4539*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 4540*0b57cec5SDimitry Andric defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 4541*0b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 4542*0b57cec5SDimitry Andric defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 4543*0b57cec5SDimitry Andric WriteFHAdd, memopv2f64>; 4544*0b57cec5SDimitry Andric } 4545*0b57cec5SDimitry Andric} 4546*0b57cec5SDimitry Andric 4547*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4548*0b57cec5SDimitry Andric// SSSE3 - Packed Absolute Instructions 4549*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4550*0b57cec5SDimitry Andric 4551*0b57cec5SDimitry Andric/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4552*0b57cec5SDimitry Andricmulticlass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 4553*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 4554*0b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4555*0b57cec5SDimitry Andric (ins VR128:$src), 4556*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4557*0b57cec5SDimitry Andric [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 4558*0b57cec5SDimitry Andric Sched<[sched.XMM]>; 4559*0b57cec5SDimitry Andric 4560*0b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4561*0b57cec5SDimitry Andric (ins i128mem:$src), 4562*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4563*0b57cec5SDimitry Andric [(set VR128:$dst, 4564*0b57cec5SDimitry Andric (vt (OpNode (ld_frag addr:$src))))]>, 4565*0b57cec5SDimitry Andric Sched<[sched.XMM.Folded]>; 4566*0b57cec5SDimitry Andric} 4567*0b57cec5SDimitry Andric 4568*0b57cec5SDimitry Andric/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4569*0b57cec5SDimitry Andricmulticlass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 4570*0b57cec5SDimitry Andric SDNode OpNode, X86SchedWriteWidths sched> { 4571*0b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4572*0b57cec5SDimitry Andric (ins VR256:$src), 4573*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4574*0b57cec5SDimitry Andric [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 4575*0b57cec5SDimitry Andric Sched<[sched.YMM]>; 4576*0b57cec5SDimitry Andric 4577*0b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4578*0b57cec5SDimitry Andric (ins i256mem:$src), 4579*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4580*0b57cec5SDimitry Andric [(set VR256:$dst, 4581*0b57cec5SDimitry Andric (vt (OpNode (load addr:$src))))]>, 4582*0b57cec5SDimitry Andric Sched<[sched.YMM.Folded]>; 4583*0b57cec5SDimitry Andric} 4584*0b57cec5SDimitry Andric 4585*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4586*0b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 4587*0b57cec5SDimitry Andric load>, VEX, VEX_WIG; 4588*0b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 4589*0b57cec5SDimitry Andric load>, VEX, VEX_WIG; 4590*0b57cec5SDimitry Andric} 4591*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4592*0b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 4593*0b57cec5SDimitry Andric load>, VEX, VEX_WIG; 4594*0b57cec5SDimitry Andric} 4595*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4596*0b57cec5SDimitry Andric defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 4597*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4598*0b57cec5SDimitry Andric defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 4599*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4600*0b57cec5SDimitry Andric} 4601*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 4602*0b57cec5SDimitry Andric defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 4603*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4604*0b57cec5SDimitry Andric} 4605*0b57cec5SDimitry Andric 4606*0b57cec5SDimitry Andricdefm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 4607*0b57cec5SDimitry Andric memop>; 4608*0b57cec5SDimitry Andricdefm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 4609*0b57cec5SDimitry Andric memop>; 4610*0b57cec5SDimitry Andricdefm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 4611*0b57cec5SDimitry Andric memop>; 4612*0b57cec5SDimitry Andric 4613*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4614*0b57cec5SDimitry Andric// SSSE3 - Packed Binary Operator Instructions 4615*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4616*0b57cec5SDimitry Andric 4617*0b57cec5SDimitry Andric/// SS3I_binop_rm - Simple SSSE3 bin op 4618*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4619*0b57cec5SDimitry Andric ValueType DstVT, ValueType OpVT, RegisterClass RC, 4620*0b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 4621*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4622*0b57cec5SDimitry Andric let isCommutable = 1 in 4623*0b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 4624*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 4625*0b57cec5SDimitry Andric !if(Is2Addr, 4626*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4627*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4628*0b57cec5SDimitry Andric [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 4629*0b57cec5SDimitry Andric Sched<[sched]>; 4630*0b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 4631*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 4632*0b57cec5SDimitry Andric !if(Is2Addr, 4633*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4634*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4635*0b57cec5SDimitry Andric [(set RC:$dst, 4636*0b57cec5SDimitry Andric (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 4637*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4638*0b57cec5SDimitry Andric} 4639*0b57cec5SDimitry Andric 4640*0b57cec5SDimitry Andric/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 4641*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 4642*0b57cec5SDimitry Andric Intrinsic IntId128, X86FoldableSchedWrite sched, 4643*0b57cec5SDimitry Andric PatFrag ld_frag, bit Is2Addr = 1> { 4644*0b57cec5SDimitry Andric let isCommutable = 1 in 4645*0b57cec5SDimitry Andric def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4646*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 4647*0b57cec5SDimitry Andric !if(Is2Addr, 4648*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4649*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4650*0b57cec5SDimitry Andric [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 4651*0b57cec5SDimitry Andric Sched<[sched]>; 4652*0b57cec5SDimitry Andric def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4653*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 4654*0b57cec5SDimitry Andric !if(Is2Addr, 4655*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4656*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4657*0b57cec5SDimitry Andric [(set VR128:$dst, 4658*0b57cec5SDimitry Andric (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 4659*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4660*0b57cec5SDimitry Andric} 4661*0b57cec5SDimitry Andric 4662*0b57cec5SDimitry Andricmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 4663*0b57cec5SDimitry Andric Intrinsic IntId256, 4664*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 4665*0b57cec5SDimitry Andric let isCommutable = 1 in 4666*0b57cec5SDimitry Andric def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4667*0b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 4668*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4669*0b57cec5SDimitry Andric [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 4670*0b57cec5SDimitry Andric Sched<[sched]>; 4671*0b57cec5SDimitry Andric def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4672*0b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 4673*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4674*0b57cec5SDimitry Andric [(set VR256:$dst, 4675*0b57cec5SDimitry Andric (IntId256 VR256:$src1, (load addr:$src2)))]>, 4676*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4677*0b57cec5SDimitry Andric} 4678*0b57cec5SDimitry Andric 4679*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4680*0b57cec5SDimitry Andriclet isCommutable = 0 in { 4681*0b57cec5SDimitry Andric defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 4682*0b57cec5SDimitry Andric VR128, load, i128mem, 4683*0b57cec5SDimitry Andric SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4684*0b57cec5SDimitry Andric defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 4685*0b57cec5SDimitry Andric v16i8, VR128, load, i128mem, 4686*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4687*0b57cec5SDimitry Andric} 4688*0b57cec5SDimitry Andricdefm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 4689*0b57cec5SDimitry Andric VR128, load, i128mem, 4690*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4691*0b57cec5SDimitry Andric} 4692*0b57cec5SDimitry Andric 4693*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX] in { 4694*0b57cec5SDimitry Andriclet isCommutable = 0 in { 4695*0b57cec5SDimitry Andric defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 4696*0b57cec5SDimitry Andric load, i128mem, 4697*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4698*0b57cec5SDimitry Andric defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 4699*0b57cec5SDimitry Andric load, i128mem, 4700*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4701*0b57cec5SDimitry Andric defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 4702*0b57cec5SDimitry Andric load, i128mem, 4703*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4704*0b57cec5SDimitry Andric defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 4705*0b57cec5SDimitry Andric load, i128mem, 4706*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, 0>, VEX_4V; 4707*0b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 4708*0b57cec5SDimitry Andric int_x86_ssse3_psign_b_128, 4709*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4710*0b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 4711*0b57cec5SDimitry Andric int_x86_ssse3_psign_w_128, 4712*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4713*0b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 4714*0b57cec5SDimitry Andric int_x86_ssse3_psign_d_128, 4715*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4716*0b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 4717*0b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 4718*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4719*0b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 4720*0b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 4721*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4722*0b57cec5SDimitry Andric} 4723*0b57cec5SDimitry Andric} 4724*0b57cec5SDimitry Andric 4725*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4726*0b57cec5SDimitry Andriclet isCommutable = 0 in { 4727*0b57cec5SDimitry Andric defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 4728*0b57cec5SDimitry Andric VR256, load, i256mem, 4729*0b57cec5SDimitry Andric SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4730*0b57cec5SDimitry Andric defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 4731*0b57cec5SDimitry Andric v32i8, VR256, load, i256mem, 4732*0b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4733*0b57cec5SDimitry Andric} 4734*0b57cec5SDimitry Andricdefm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 4735*0b57cec5SDimitry Andric VR256, load, i256mem, 4736*0b57cec5SDimitry Andric SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4737*0b57cec5SDimitry Andric} 4738*0b57cec5SDimitry Andric 4739*0b57cec5SDimitry Andriclet ImmT = NoImm, Predicates = [HasAVX2] in { 4740*0b57cec5SDimitry Andriclet isCommutable = 0 in { 4741*0b57cec5SDimitry Andric defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 4742*0b57cec5SDimitry Andric VR256, load, i256mem, 4743*0b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4744*0b57cec5SDimitry Andric defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 4745*0b57cec5SDimitry Andric load, i256mem, 4746*0b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4747*0b57cec5SDimitry Andric defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 4748*0b57cec5SDimitry Andric VR256, load, i256mem, 4749*0b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4750*0b57cec5SDimitry Andric defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 4751*0b57cec5SDimitry Andric load, i256mem, 4752*0b57cec5SDimitry Andric SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L; 4753*0b57cec5SDimitry Andric defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 4754*0b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4755*0b57cec5SDimitry Andric defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 4756*0b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4757*0b57cec5SDimitry Andric defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 4758*0b57cec5SDimitry Andric SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4759*0b57cec5SDimitry Andric defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 4760*0b57cec5SDimitry Andric int_x86_avx2_phadd_sw, 4761*0b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4762*0b57cec5SDimitry Andric defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 4763*0b57cec5SDimitry Andric int_x86_avx2_phsub_sw, 4764*0b57cec5SDimitry Andric SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4765*0b57cec5SDimitry Andric} 4766*0b57cec5SDimitry Andric} 4767*0b57cec5SDimitry Andric 4768*0b57cec5SDimitry Andric// None of these have i8 immediate fields. 4769*0b57cec5SDimitry Andriclet ImmT = NoImm, Constraints = "$src1 = $dst" in { 4770*0b57cec5SDimitry Andriclet isCommutable = 0 in { 4771*0b57cec5SDimitry Andric defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 4772*0b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 4773*0b57cec5SDimitry Andric defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 4774*0b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 4775*0b57cec5SDimitry Andric defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 4776*0b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 4777*0b57cec5SDimitry Andric defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 4778*0b57cec5SDimitry Andric memop, i128mem, SchedWritePHAdd.XMM>; 4779*0b57cec5SDimitry Andric defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 4780*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 4781*0b57cec5SDimitry Andric defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 4782*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 4783*0b57cec5SDimitry Andric defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 4784*0b57cec5SDimitry Andric SchedWriteVecALU.XMM, memop>; 4785*0b57cec5SDimitry Andric defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 4786*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVarShuffle.XMM>; 4787*0b57cec5SDimitry Andric defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 4788*0b57cec5SDimitry Andric int_x86_ssse3_phadd_sw_128, 4789*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 4790*0b57cec5SDimitry Andric defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 4791*0b57cec5SDimitry Andric int_x86_ssse3_phsub_sw_128, 4792*0b57cec5SDimitry Andric SchedWritePHAdd.XMM, memop>; 4793*0b57cec5SDimitry Andric defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 4794*0b57cec5SDimitry Andric v16i8, VR128, memop, i128mem, 4795*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 4796*0b57cec5SDimitry Andric} 4797*0b57cec5SDimitry Andricdefm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 4798*0b57cec5SDimitry Andric VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 4799*0b57cec5SDimitry Andric} 4800*0b57cec5SDimitry Andric 4801*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4802*0b57cec5SDimitry Andric// SSSE3 - Packed Align Instruction Patterns 4803*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4804*0b57cec5SDimitry Andric 4805*0b57cec5SDimitry Andricmulticlass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 4806*0b57cec5SDimitry Andric PatFrag memop_frag, X86MemOperand x86memop, 4807*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4808*0b57cec5SDimitry Andric let hasSideEffects = 0 in { 4809*0b57cec5SDimitry Andric def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 4810*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 4811*0b57cec5SDimitry Andric !if(Is2Addr, 4812*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4813*0b57cec5SDimitry Andric !strconcat(asm, 4814*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4815*0b57cec5SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>, 4816*0b57cec5SDimitry Andric Sched<[sched]>; 4817*0b57cec5SDimitry Andric let mayLoad = 1 in 4818*0b57cec5SDimitry Andric def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 4819*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 4820*0b57cec5SDimitry Andric !if(Is2Addr, 4821*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4822*0b57cec5SDimitry Andric !strconcat(asm, 4823*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4824*0b57cec5SDimitry Andric [(set RC:$dst, (VT (X86PAlignr RC:$src1, 4825*0b57cec5SDimitry Andric (memop_frag addr:$src2), 4826*0b57cec5SDimitry Andric (i8 imm:$src3))))]>, 4827*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 4828*0b57cec5SDimitry Andric } 4829*0b57cec5SDimitry Andric} 4830*0b57cec5SDimitry Andric 4831*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in 4832*0b57cec5SDimitry Andric defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 4833*0b57cec5SDimitry Andric SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4834*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 4835*0b57cec5SDimitry Andric defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 4836*0b57cec5SDimitry Andric SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4837*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 4838*0b57cec5SDimitry Andric defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 4839*0b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 4840*0b57cec5SDimitry Andric 4841*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4842*0b57cec5SDimitry Andric// SSSE3 - Thread synchronization 4843*0b57cec5SDimitry Andric//===---------------------------------------------------------------------===// 4844*0b57cec5SDimitry Andric 4845*0b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 4846*0b57cec5SDimitry Andriclet Uses = [EAX, ECX, EDX] in 4847*0b57cec5SDimitry Andricdef MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4848*0b57cec5SDimitry Andric TB, Requires<[HasSSE3, Not64BitMode]>; 4849*0b57cec5SDimitry Andriclet Uses = [RAX, ECX, EDX] in 4850*0b57cec5SDimitry Andricdef MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4851*0b57cec5SDimitry Andric TB, Requires<[HasSSE3, In64BitMode]>; 4852*0b57cec5SDimitry Andric 4853*0b57cec5SDimitry Andriclet Uses = [ECX, EAX] in 4854*0b57cec5SDimitry Andricdef MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 4855*0b57cec5SDimitry Andric [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 4856*0b57cec5SDimitry Andric} // SchedRW 4857*0b57cec5SDimitry Andric 4858*0b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 4859*0b57cec5SDimitry Andricdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 4860*0b57cec5SDimitry Andric 4861*0b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 4862*0b57cec5SDimitry Andric Requires<[Not64BitMode]>; 4863*0b57cec5SDimitry Andricdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 4864*0b57cec5SDimitry Andric Requires<[In64BitMode]>; 4865*0b57cec5SDimitry Andric 4866*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 4867*0b57cec5SDimitry Andric// SSE4.1 - Packed Move with Sign/Zero Extend 4868*0b57cec5SDimitry Andric// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 4869*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 4870*0b57cec5SDimitry Andric 4871*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4872*0b57cec5SDimitry Andric RegisterClass OutRC, RegisterClass InRC, 4873*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 4874*0b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 4875*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4876*0b57cec5SDimitry Andric Sched<[sched]>; 4877*0b57cec5SDimitry Andric 4878*0b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 4879*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4880*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 4881*0b57cec5SDimitry Andric} 4882*0b57cec5SDimitry Andric 4883*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 4884*0b57cec5SDimitry Andric X86MemOperand MemOp, X86MemOperand MemYOp, 4885*0b57cec5SDimitry Andric Predicate prd> { 4886*0b57cec5SDimitry Andric defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 4887*0b57cec5SDimitry Andric SchedWriteShuffle.XMM>; 4888*0b57cec5SDimitry Andric let Predicates = [HasAVX, prd] in 4889*0b57cec5SDimitry Andric defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 4890*0b57cec5SDimitry Andric VR128, VR128, SchedWriteShuffle.XMM>, 4891*0b57cec5SDimitry Andric VEX, VEX_WIG; 4892*0b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in 4893*0b57cec5SDimitry Andric defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 4894*0b57cec5SDimitry Andric VR256, VR128, WriteShuffle256>, 4895*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 4896*0b57cec5SDimitry Andric} 4897*0b57cec5SDimitry Andric 4898*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4899*0b57cec5SDimitry Andric X86MemOperand MemYOp, Predicate prd> { 4900*0b57cec5SDimitry Andric defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 4901*0b57cec5SDimitry Andric MemOp, MemYOp, prd>; 4902*0b57cec5SDimitry Andric defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 4903*0b57cec5SDimitry Andric !strconcat("pmovzx", OpcodeStr), 4904*0b57cec5SDimitry Andric MemOp, MemYOp, prd>; 4905*0b57cec5SDimitry Andric} 4906*0b57cec5SDimitry Andric 4907*0b57cec5SDimitry Andricdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 4908*0b57cec5SDimitry Andricdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 4909*0b57cec5SDimitry Andricdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 4910*0b57cec5SDimitry Andric 4911*0b57cec5SDimitry Andricdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 4912*0b57cec5SDimitry Andricdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 4913*0b57cec5SDimitry Andric 4914*0b57cec5SDimitry Andricdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 4915*0b57cec5SDimitry Andric 4916*0b57cec5SDimitry Andric// AVX2 Patterns 4917*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 4918*0b57cec5SDimitry Andric SDNode ExtOp, SDNode InVecOp> { 4919*0b57cec5SDimitry Andric // Register-Register patterns 4920*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4921*0b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 4922*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 4923*0b57cec5SDimitry Andric } 4924*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 4925*0b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 4926*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 4927*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 4928*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 4929*0b57cec5SDimitry Andric 4930*0b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 4931*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 4932*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 4933*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 4934*0b57cec5SDimitry Andric 4935*0b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 4936*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 4937*0b57cec5SDimitry Andric } 4938*0b57cec5SDimitry Andric 4939*0b57cec5SDimitry Andric // Simple Register-Memory patterns 4940*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4941*0b57cec5SDimitry Andric def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 4942*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 4943*0b57cec5SDimitry Andric 4944*0b57cec5SDimitry Andric def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 4945*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 4946*0b57cec5SDimitry Andric } 4947*0b57cec5SDimitry Andric 4948*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 4949*0b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 4950*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 4951*0b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 4952*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 4953*0b57cec5SDimitry Andric 4954*0b57cec5SDimitry Andric def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 4955*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 4956*0b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 4957*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 4958*0b57cec5SDimitry Andric 4959*0b57cec5SDimitry Andric def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 4960*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 4961*0b57cec5SDimitry Andric } 4962*0b57cec5SDimitry Andric 4963*0b57cec5SDimitry Andric // AVX2 Register-Memory patterns 4964*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 4965*0b57cec5SDimitry Andric def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 4966*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 4967*0b57cec5SDimitry Andric 4968*0b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 4969*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 4970*0b57cec5SDimitry Andric def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))), 4971*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 4972*0b57cec5SDimitry Andric 4973*0b57cec5SDimitry Andric def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 4974*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 4975*0b57cec5SDimitry Andric 4976*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 4977*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 4978*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))), 4979*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 4980*0b57cec5SDimitry Andric 4981*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 4982*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 4983*0b57cec5SDimitry Andric def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))), 4984*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 4985*0b57cec5SDimitry Andric } 4986*0b57cec5SDimitry Andric} 4987*0b57cec5SDimitry Andric 4988*0b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 4989*0b57cec5SDimitry Andricdefm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 4990*0b57cec5SDimitry Andric 4991*0b57cec5SDimitry Andric// SSE4.1/AVX patterns. 4992*0b57cec5SDimitry Andricmulticlass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 4993*0b57cec5SDimitry Andric SDNode ExtOp> { 4994*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4995*0b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 4996*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 4997*0b57cec5SDimitry Andric } 4998*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 4999*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 5000*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 5001*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 5002*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 5003*0b57cec5SDimitry Andric 5004*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 5005*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 5006*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 5007*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 5008*0b57cec5SDimitry Andric 5009*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 5010*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 5011*0b57cec5SDimitry Andric } 5012*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5013*0b57cec5SDimitry Andric def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5014*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5015*0b57cec5SDimitry Andric } 5016*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 5017*0b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5018*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5019*0b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5020*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5021*0b57cec5SDimitry Andric 5022*0b57cec5SDimitry Andric def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5023*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5024*0b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5025*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5026*0b57cec5SDimitry Andric 5027*0b57cec5SDimitry Andric def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5028*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5029*0b57cec5SDimitry Andric } 5030*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5031*0b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5032*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5033*0b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5034*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5035*0b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 5036*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5037*0b57cec5SDimitry Andric def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 5038*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5039*0b57cec5SDimitry Andric } 5040*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 5041*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5042*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5043*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 5044*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5045*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 5046*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5047*0b57cec5SDimitry Andric 5048*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 5049*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5050*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 5051*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5052*0b57cec5SDimitry Andric 5053*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5054*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5055*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5056*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5057*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 5058*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5059*0b57cec5SDimitry Andric def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 5060*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5061*0b57cec5SDimitry Andric 5062*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5063*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5064*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 5065*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5066*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 5067*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5068*0b57cec5SDimitry Andric 5069*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5070*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5071*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5072*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5073*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 5074*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5075*0b57cec5SDimitry Andric def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 5076*0b57cec5SDimitry Andric (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5077*0b57cec5SDimitry Andric } 5078*0b57cec5SDimitry Andric} 5079*0b57cec5SDimitry Andric 5080*0b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 5081*0b57cec5SDimitry Andricdefm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 5082*0b57cec5SDimitry Andric 5083*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 5084*0b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 5085*0b57cec5SDimitry Andric defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 5086*0b57cec5SDimitry Andric} 5087*0b57cec5SDimitry Andric 5088*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5089*0b57cec5SDimitry Andric// SSE4.1 - Extract Instructions 5090*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5091*0b57cec5SDimitry Andric 5092*0b57cec5SDimitry Andric/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5093*0b57cec5SDimitry Andricmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> { 5094*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5095*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 5096*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5097*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5098*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 5099*0b57cec5SDimitry Andric imm:$src2))]>, 5100*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 5101*0b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 5102*0b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 5103*0b57cec5SDimitry Andric (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 5104*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5105*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5106*0b57cec5SDimitry Andric [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), 5107*0b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5108*0b57cec5SDimitry Andric} 5109*0b57cec5SDimitry Andric 5110*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 5111*0b57cec5SDimitry Andric defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG; 5112*0b57cec5SDimitry Andric 5113*0b57cec5SDimitry Andricdefm PEXTRB : SS41I_extract8<0x14, "pextrb">; 5114*0b57cec5SDimitry Andric 5115*0b57cec5SDimitry Andric 5116*0b57cec5SDimitry Andric/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 5117*0b57cec5SDimitry Andricmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> { 5118*0b57cec5SDimitry Andric let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 5119*0b57cec5SDimitry Andric def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5120*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 5121*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5122*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 5123*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>; 5124*0b57cec5SDimitry Andric 5125*0b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 5126*0b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 5127*0b57cec5SDimitry Andric (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 5128*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5129*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5130*0b57cec5SDimitry Andric [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))), 5131*0b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5132*0b57cec5SDimitry Andric} 5133*0b57cec5SDimitry Andric 5134*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 5135*0b57cec5SDimitry Andric defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG; 5136*0b57cec5SDimitry Andric 5137*0b57cec5SDimitry Andricdefm PEXTRW : SS41I_extract16<0x15, "pextrw">; 5138*0b57cec5SDimitry Andric 5139*0b57cec5SDimitry Andric 5140*0b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5141*0b57cec5SDimitry Andricmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> { 5142*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5143*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 5144*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5145*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5146*0b57cec5SDimitry Andric [(set GR32:$dst, 5147*0b57cec5SDimitry Andric (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 5148*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 5149*0b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 5150*0b57cec5SDimitry Andric (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 5151*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5152*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5153*0b57cec5SDimitry Andric [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 5154*0b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5155*0b57cec5SDimitry Andric} 5156*0b57cec5SDimitry Andric 5157*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5158*0b57cec5SDimitry Andric defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 5159*0b57cec5SDimitry Andric 5160*0b57cec5SDimitry Andricdefm PEXTRD : SS41I_extract32<0x16, "pextrd">; 5161*0b57cec5SDimitry Andric 5162*0b57cec5SDimitry Andric/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5163*0b57cec5SDimitry Andricmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> { 5164*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 5165*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 5166*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5167*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5168*0b57cec5SDimitry Andric [(set GR64:$dst, 5169*0b57cec5SDimitry Andric (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 5170*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 5171*0b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 5172*0b57cec5SDimitry Andric (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 5173*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5174*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5175*0b57cec5SDimitry Andric [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 5176*0b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5177*0b57cec5SDimitry Andric} 5178*0b57cec5SDimitry Andric 5179*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5180*0b57cec5SDimitry Andric defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 5181*0b57cec5SDimitry Andric 5182*0b57cec5SDimitry Andricdefm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 5183*0b57cec5SDimitry Andric 5184*0b57cec5SDimitry Andric/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 5185*0b57cec5SDimitry Andric/// destination 5186*0b57cec5SDimitry Andricmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 5187*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5188*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 5189*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5190*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5191*0b57cec5SDimitry Andric [(set GR32orGR64:$dst, 5192*0b57cec5SDimitry Andric (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 5193*0b57cec5SDimitry Andric Sched<[WriteVecExtract]>; 5194*0b57cec5SDimitry Andric def mr : SS4AIi8<opc, MRMDestMem, (outs), 5195*0b57cec5SDimitry Andric (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 5196*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5197*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5198*0b57cec5SDimitry Andric [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 5199*0b57cec5SDimitry Andric addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5200*0b57cec5SDimitry Andric} 5201*0b57cec5SDimitry Andric 5202*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 5203*0b57cec5SDimitry Andric let Predicates = [UseAVX] in 5204*0b57cec5SDimitry Andric defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; 5205*0b57cec5SDimitry Andric defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 5206*0b57cec5SDimitry Andric} 5207*0b57cec5SDimitry Andric 5208*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5209*0b57cec5SDimitry Andric// SSE4.1 - Insert Instructions 5210*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5211*0b57cec5SDimitry Andric 5212*0b57cec5SDimitry Andricmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 5213*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5214*0b57cec5SDimitry Andric (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 5215*0b57cec5SDimitry Andric !if(Is2Addr, 5216*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5217*0b57cec5SDimitry Andric !strconcat(asm, 5218*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5219*0b57cec5SDimitry Andric [(set VR128:$dst, 5220*0b57cec5SDimitry Andric (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 5221*0b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5222*0b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5223*0b57cec5SDimitry Andric (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 5224*0b57cec5SDimitry Andric !if(Is2Addr, 5225*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5226*0b57cec5SDimitry Andric !strconcat(asm, 5227*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5228*0b57cec5SDimitry Andric [(set VR128:$dst, 5229*0b57cec5SDimitry Andric (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>, 5230*0b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5231*0b57cec5SDimitry Andric} 5232*0b57cec5SDimitry Andric 5233*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoBWI] in 5234*0b57cec5SDimitry Andric defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; 5235*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 5236*0b57cec5SDimitry Andric defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 5237*0b57cec5SDimitry Andric 5238*0b57cec5SDimitry Andricmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 5239*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5240*0b57cec5SDimitry Andric (ins VR128:$src1, GR32:$src2, u8imm:$src3), 5241*0b57cec5SDimitry Andric !if(Is2Addr, 5242*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5243*0b57cec5SDimitry Andric !strconcat(asm, 5244*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5245*0b57cec5SDimitry Andric [(set VR128:$dst, 5246*0b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 5247*0b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5248*0b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5249*0b57cec5SDimitry Andric (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 5250*0b57cec5SDimitry Andric !if(Is2Addr, 5251*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5252*0b57cec5SDimitry Andric !strconcat(asm, 5253*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5254*0b57cec5SDimitry Andric [(set VR128:$dst, 5255*0b57cec5SDimitry Andric (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 5256*0b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5257*0b57cec5SDimitry Andric} 5258*0b57cec5SDimitry Andric 5259*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5260*0b57cec5SDimitry Andric defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 5261*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 5262*0b57cec5SDimitry Andric defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 5263*0b57cec5SDimitry Andric 5264*0b57cec5SDimitry Andricmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 5265*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5266*0b57cec5SDimitry Andric (ins VR128:$src1, GR64:$src2, u8imm:$src3), 5267*0b57cec5SDimitry Andric !if(Is2Addr, 5268*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5269*0b57cec5SDimitry Andric !strconcat(asm, 5270*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5271*0b57cec5SDimitry Andric [(set VR128:$dst, 5272*0b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 5273*0b57cec5SDimitry Andric Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5274*0b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5275*0b57cec5SDimitry Andric (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 5276*0b57cec5SDimitry Andric !if(Is2Addr, 5277*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5278*0b57cec5SDimitry Andric !strconcat(asm, 5279*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5280*0b57cec5SDimitry Andric [(set VR128:$dst, 5281*0b57cec5SDimitry Andric (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 5282*0b57cec5SDimitry Andric Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5283*0b57cec5SDimitry Andric} 5284*0b57cec5SDimitry Andric 5285*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoDQI] in 5286*0b57cec5SDimitry Andric defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 5287*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 5288*0b57cec5SDimitry Andric defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 5289*0b57cec5SDimitry Andric 5290*0b57cec5SDimitry Andric// insertps has a few different modes, there's the first two here below which 5291*0b57cec5SDimitry Andric// are optimized inserts that won't zero arbitrary elements in the destination 5292*0b57cec5SDimitry Andric// vector. The next one matches the intrinsic and could zero arbitrary elements 5293*0b57cec5SDimitry Andric// in the target vector. 5294*0b57cec5SDimitry Andricmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 5295*0b57cec5SDimitry Andric let isCommutable = 1 in 5296*0b57cec5SDimitry Andric def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5297*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 5298*0b57cec5SDimitry Andric !if(Is2Addr, 5299*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5300*0b57cec5SDimitry Andric !strconcat(asm, 5301*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5302*0b57cec5SDimitry Andric [(set VR128:$dst, 5303*0b57cec5SDimitry Andric (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>, 5304*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM]>; 5305*0b57cec5SDimitry Andric def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5306*0b57cec5SDimitry Andric (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 5307*0b57cec5SDimitry Andric !if(Is2Addr, 5308*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5309*0b57cec5SDimitry Andric !strconcat(asm, 5310*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5311*0b57cec5SDimitry Andric [(set VR128:$dst, 5312*0b57cec5SDimitry Andric (X86insertps VR128:$src1, 5313*0b57cec5SDimitry Andric (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 5314*0b57cec5SDimitry Andric imm:$src3))]>, 5315*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 5316*0b57cec5SDimitry Andric} 5317*0b57cec5SDimitry Andric 5318*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 5319*0b57cec5SDimitry Andric let Predicates = [UseAVX] in 5320*0b57cec5SDimitry Andric defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 5321*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5322*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in 5323*0b57cec5SDimitry Andric defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 5324*0b57cec5SDimitry Andric} 5325*0b57cec5SDimitry Andric 5326*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5327*0b57cec5SDimitry Andric // If we're inserting an element from a vbroadcast of a load, fold the 5328*0b57cec5SDimitry Andric // load into the X86insertps instruction. 5329*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 5330*0b57cec5SDimitry Andric (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), 5331*0b57cec5SDimitry Andric (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 5332*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 5333*0b57cec5SDimitry Andric (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), 5334*0b57cec5SDimitry Andric (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 5335*0b57cec5SDimitry Andric} 5336*0b57cec5SDimitry Andric 5337*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5338*0b57cec5SDimitry Andric// SSE4.1 - Round Instructions 5339*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5340*0b57cec5SDimitry Andric 5341*0b57cec5SDimitry Andricmulticlass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 5342*0b57cec5SDimitry Andric X86MemOperand x86memop, RegisterClass RC, 5343*0b57cec5SDimitry Andric ValueType VT, PatFrag mem_frag, SDNode OpNode, 5344*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 5345*0b57cec5SDimitry Andric // Intrinsic operation, reg. 5346*0b57cec5SDimitry Andric // Vector intrinsic operation, reg 5347*0b57cec5SDimitry Andric def r : SS4AIi8<opc, MRMSrcReg, 5348*0b57cec5SDimitry Andric (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 5349*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5350*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5351*0b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>, 5352*0b57cec5SDimitry Andric Sched<[sched]>; 5353*0b57cec5SDimitry Andric 5354*0b57cec5SDimitry Andric // Vector intrinsic operation, mem 5355*0b57cec5SDimitry Andric def m : SS4AIi8<opc, MRMSrcMem, 5356*0b57cec5SDimitry Andric (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 5357*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5358*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5359*0b57cec5SDimitry Andric [(set RC:$dst, 5360*0b57cec5SDimitry Andric (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>, 5361*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 5362*0b57cec5SDimitry Andric} 5363*0b57cec5SDimitry Andric 5364*0b57cec5SDimitry Andricmulticlass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 5365*0b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5366*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { 5367*0b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 5368*0b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 5369*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5370*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5371*0b57cec5SDimitry Andric []>, Sched<[sched]>; 5372*0b57cec5SDimitry Andric 5373*0b57cec5SDimitry Andric let mayLoad = 1 in 5374*0b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 5375*0b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 5376*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5377*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5378*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5379*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5380*0b57cec5SDimitry Andric 5381*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { 5382*0b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 5383*0b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 5384*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5385*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5386*0b57cec5SDimitry Andric []>, Sched<[sched]>; 5387*0b57cec5SDimitry Andric 5388*0b57cec5SDimitry Andric let mayLoad = 1 in 5389*0b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 5390*0b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 5391*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5392*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5393*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5394*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5395*0b57cec5SDimitry Andric} 5396*0b57cec5SDimitry Andric 5397*0b57cec5SDimitry Andricmulticlass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 5398*0b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched> { 5399*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { 5400*0b57cec5SDimitry Andric def SSr : SS4AIi8<opcss, MRMSrcReg, 5401*0b57cec5SDimitry Andric (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 5402*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5403*0b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5404*0b57cec5SDimitry Andric []>, Sched<[sched]>; 5405*0b57cec5SDimitry Andric 5406*0b57cec5SDimitry Andric let mayLoad = 1 in 5407*0b57cec5SDimitry Andric def SSm : SS4AIi8<opcss, MRMSrcMem, 5408*0b57cec5SDimitry Andric (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 5409*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5410*0b57cec5SDimitry Andric "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5411*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5412*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5413*0b57cec5SDimitry Andric 5414*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { 5415*0b57cec5SDimitry Andric def SDr : SS4AIi8<opcsd, MRMSrcReg, 5416*0b57cec5SDimitry Andric (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 5417*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5418*0b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5419*0b57cec5SDimitry Andric []>, Sched<[sched]>; 5420*0b57cec5SDimitry Andric 5421*0b57cec5SDimitry Andric let mayLoad = 1 in 5422*0b57cec5SDimitry Andric def SDm : SS4AIi8<opcsd, MRMSrcMem, 5423*0b57cec5SDimitry Andric (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 5424*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5425*0b57cec5SDimitry Andric "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5426*0b57cec5SDimitry Andric []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5427*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5428*0b57cec5SDimitry Andric} 5429*0b57cec5SDimitry Andric 5430*0b57cec5SDimitry Andricmulticlass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd, 5431*0b57cec5SDimitry Andric string OpcodeStr, X86FoldableSchedWrite sched, 5432*0b57cec5SDimitry Andric ValueType VT32, ValueType VT64, 5433*0b57cec5SDimitry Andric SDNode OpNode, bit Is2Addr = 1> { 5434*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in { 5435*0b57cec5SDimitry Andric def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 5436*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5437*0b57cec5SDimitry Andric !if(Is2Addr, 5438*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5439*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5440*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5441*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5442*0b57cec5SDimitry Andric [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, 5443*0b57cec5SDimitry Andric Sched<[sched]>; 5444*0b57cec5SDimitry Andric 5445*0b57cec5SDimitry Andric def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 5446*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 5447*0b57cec5SDimitry Andric !if(Is2Addr, 5448*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5449*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5450*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5451*0b57cec5SDimitry Andric "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5452*0b57cec5SDimitry Andric [(set VR128:$dst, 5453*0b57cec5SDimitry Andric (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 5454*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 5455*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 5456*0b57cec5SDimitry Andric 5457*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in { 5458*0b57cec5SDimitry Andric def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 5459*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5460*0b57cec5SDimitry Andric !if(Is2Addr, 5461*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5462*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5463*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5464*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5465*0b57cec5SDimitry Andric [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, 5466*0b57cec5SDimitry Andric Sched<[sched]>; 5467*0b57cec5SDimitry Andric 5468*0b57cec5SDimitry Andric def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, 5469*0b57cec5SDimitry Andric (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 5470*0b57cec5SDimitry Andric !if(Is2Addr, 5471*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5472*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5473*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5474*0b57cec5SDimitry Andric "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5475*0b57cec5SDimitry Andric [(set VR128:$dst, 5476*0b57cec5SDimitry Andric (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 5477*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 5478*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 5479*0b57cec5SDimitry Andric} 5480*0b57cec5SDimitry Andric 5481*0b57cec5SDimitry Andric// FP round - roundss, roundps, roundsd, roundpd 5482*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5483*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 5484*0b57cec5SDimitry Andric // Intrinsic form 5485*0b57cec5SDimitry Andric defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 5486*0b57cec5SDimitry Andric loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>, 5487*0b57cec5SDimitry Andric VEX, VEX_WIG; 5488*0b57cec5SDimitry Andric defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 5489*0b57cec5SDimitry Andric loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>, 5490*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 5491*0b57cec5SDimitry Andric } 5492*0b57cec5SDimitry Andric 5493*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 5494*0b57cec5SDimitry Andric defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 5495*0b57cec5SDimitry Andric loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>, 5496*0b57cec5SDimitry Andric VEX, VEX_WIG; 5497*0b57cec5SDimitry Andric defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 5498*0b57cec5SDimitry Andric loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>, 5499*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 5500*0b57cec5SDimitry Andric } 5501*0b57cec5SDimitry Andric} 5502*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5503*0b57cec5SDimitry Andric defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 5504*0b57cec5SDimitry Andric v4f32, v2f64, X86RndScales, 0>, 5505*0b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 5506*0b57cec5SDimitry Andric defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 5507*0b57cec5SDimitry Andric VEX_4V, VEX_LIG, VEX_WIG; 5508*0b57cec5SDimitry Andric} 5509*0b57cec5SDimitry Andric 5510*0b57cec5SDimitry Andriclet Predicates = [UseAVX] in { 5511*0b57cec5SDimitry Andric def : Pat<(X86VRndScale FR32:$src1, imm:$src2), 5512*0b57cec5SDimitry Andric (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>; 5513*0b57cec5SDimitry Andric def : Pat<(X86VRndScale FR64:$src1, imm:$src2), 5514*0b57cec5SDimitry Andric (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>; 5515*0b57cec5SDimitry Andric} 5516*0b57cec5SDimitry Andric 5517*0b57cec5SDimitry Andriclet Predicates = [UseAVX, OptForSize] in { 5518*0b57cec5SDimitry Andric def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), 5519*0b57cec5SDimitry Andric (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; 5520*0b57cec5SDimitry Andric def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), 5521*0b57cec5SDimitry Andric (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; 5522*0b57cec5SDimitry Andric} 5523*0b57cec5SDimitry Andric 5524*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 5525*0b57cec5SDimitry Andricdefm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 5526*0b57cec5SDimitry Andric memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>; 5527*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 5528*0b57cec5SDimitry Andricdefm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 5529*0b57cec5SDimitry Andric memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>; 5530*0b57cec5SDimitry Andric 5531*0b57cec5SDimitry Andricdefm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 5532*0b57cec5SDimitry Andric 5533*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 5534*0b57cec5SDimitry Andricdefm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 5535*0b57cec5SDimitry Andric v4f32, v2f64, X86RndScales>; 5536*0b57cec5SDimitry Andric 5537*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 5538*0b57cec5SDimitry Andric def : Pat<(X86VRndScale FR32:$src1, imm:$src2), 5539*0b57cec5SDimitry Andric (ROUNDSSr FR32:$src1, imm:$src2)>; 5540*0b57cec5SDimitry Andric def : Pat<(X86VRndScale FR64:$src1, imm:$src2), 5541*0b57cec5SDimitry Andric (ROUNDSDr FR64:$src1, imm:$src2)>; 5542*0b57cec5SDimitry Andric} 5543*0b57cec5SDimitry Andric 5544*0b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSize] in { 5545*0b57cec5SDimitry Andric def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), 5546*0b57cec5SDimitry Andric (ROUNDSSm addr:$src1, imm:$src2)>; 5547*0b57cec5SDimitry Andric def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), 5548*0b57cec5SDimitry Andric (ROUNDSDm addr:$src1, imm:$src2)>; 5549*0b57cec5SDimitry Andric} 5550*0b57cec5SDimitry Andric 5551*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5552*0b57cec5SDimitry Andric// SSE4.1 - Packed Bit Test 5553*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5554*0b57cec5SDimitry Andric 5555*0b57cec5SDimitry Andric// ptest instruction we'll lower to this in X86ISelLowering primarily from 5556*0b57cec5SDimitry Andric// the intel intrinsic that corresponds to this. 5557*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 5558*0b57cec5SDimitry Andricdef VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5559*0b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 5560*0b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5561*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG; 5562*0b57cec5SDimitry Andricdef VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5563*0b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 5564*0b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 5565*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 5566*0b57cec5SDimitry Andric VEX, VEX_WIG; 5567*0b57cec5SDimitry Andric 5568*0b57cec5SDimitry Andricdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 5569*0b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 5570*0b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 5571*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG; 5572*0b57cec5SDimitry Andricdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 5573*0b57cec5SDimitry Andric "vptest\t{$src2, $src1|$src1, $src2}", 5574*0b57cec5SDimitry Andric [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 5575*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 5576*0b57cec5SDimitry Andric VEX, VEX_L, VEX_WIG; 5577*0b57cec5SDimitry Andric} 5578*0b57cec5SDimitry Andric 5579*0b57cec5SDimitry Andriclet Defs = [EFLAGS] in { 5580*0b57cec5SDimitry Andricdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5581*0b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 5582*0b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5583*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM]>; 5584*0b57cec5SDimitry Andricdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5585*0b57cec5SDimitry Andric "ptest\t{$src2, $src1|$src1, $src2}", 5586*0b57cec5SDimitry Andric [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 5587*0b57cec5SDimitry Andric Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 5588*0b57cec5SDimitry Andric} 5589*0b57cec5SDimitry Andric 5590*0b57cec5SDimitry Andric// The bit test instructions below are AVX only 5591*0b57cec5SDimitry Andricmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 5592*0b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 5593*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 5594*0b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 5595*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5596*0b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 5597*0b57cec5SDimitry Andric Sched<[sched]>, VEX; 5598*0b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 5599*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5600*0b57cec5SDimitry Andric [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 5601*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 5602*0b57cec5SDimitry Andric} 5603*0b57cec5SDimitry Andric 5604*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasAVX] in { 5605*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 5606*0b57cec5SDimitry Andricdefm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 5607*0b57cec5SDimitry Andric SchedWriteFTest.XMM>; 5608*0b57cec5SDimitry Andricdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 5609*0b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 5610*0b57cec5SDimitry Andric} 5611*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 5612*0b57cec5SDimitry Andricdefm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 5613*0b57cec5SDimitry Andric SchedWriteFTest.XMM>; 5614*0b57cec5SDimitry Andricdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 5615*0b57cec5SDimitry Andric SchedWriteFTest.YMM>, VEX_L; 5616*0b57cec5SDimitry Andric} 5617*0b57cec5SDimitry Andric} 5618*0b57cec5SDimitry Andric 5619*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5620*0b57cec5SDimitry Andric// SSE4.1 - Misc Instructions 5621*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 5622*0b57cec5SDimitry Andric 5623*0b57cec5SDimitry Andriclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 5624*0b57cec5SDimitry Andric def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 5625*0b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 5626*0b57cec5SDimitry Andric [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 5627*0b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize16, XS; 5628*0b57cec5SDimitry Andric def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 5629*0b57cec5SDimitry Andric "popcnt{w}\t{$src, $dst|$dst, $src}", 5630*0b57cec5SDimitry Andric [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 5631*0b57cec5SDimitry Andric (implicit EFLAGS)]>, 5632*0b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize16, XS; 5633*0b57cec5SDimitry Andric 5634*0b57cec5SDimitry Andric def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 5635*0b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 5636*0b57cec5SDimitry Andric [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 5637*0b57cec5SDimitry Andric Sched<[WritePOPCNT]>, OpSize32, XS; 5638*0b57cec5SDimitry Andric 5639*0b57cec5SDimitry Andric def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 5640*0b57cec5SDimitry Andric "popcnt{l}\t{$src, $dst|$dst, $src}", 5641*0b57cec5SDimitry Andric [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 5642*0b57cec5SDimitry Andric (implicit EFLAGS)]>, 5643*0b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, OpSize32, XS; 5644*0b57cec5SDimitry Andric 5645*0b57cec5SDimitry Andric def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 5646*0b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 5647*0b57cec5SDimitry Andric [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 5648*0b57cec5SDimitry Andric Sched<[WritePOPCNT]>, XS; 5649*0b57cec5SDimitry Andric def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 5650*0b57cec5SDimitry Andric "popcnt{q}\t{$src, $dst|$dst, $src}", 5651*0b57cec5SDimitry Andric [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 5652*0b57cec5SDimitry Andric (implicit EFLAGS)]>, 5653*0b57cec5SDimitry Andric Sched<[WritePOPCNT.Folded]>, XS; 5654*0b57cec5SDimitry Andric} 5655*0b57cec5SDimitry Andric 5656*0b57cec5SDimitry Andric// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 5657*0b57cec5SDimitry Andricmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 5658*0b57cec5SDimitry Andric SDNode OpNode, PatFrag ld_frag, 5659*0b57cec5SDimitry Andric X86FoldableSchedWrite Sched> { 5660*0b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5661*0b57cec5SDimitry Andric (ins VR128:$src), 5662*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5663*0b57cec5SDimitry Andric [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 5664*0b57cec5SDimitry Andric Sched<[Sched]>; 5665*0b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5666*0b57cec5SDimitry Andric (ins i128mem:$src), 5667*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5668*0b57cec5SDimitry Andric [(set VR128:$dst, 5669*0b57cec5SDimitry Andric (v8i16 (OpNode (ld_frag addr:$src))))]>, 5670*0b57cec5SDimitry Andric Sched<[Sched.Folded]>; 5671*0b57cec5SDimitry Andric} 5672*0b57cec5SDimitry Andric 5673*0b57cec5SDimitry Andric// PHMIN has the same profile as PSAD, thus we use the same scheduling 5674*0b57cec5SDimitry Andric// model, although the naming is misleading. 5675*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in 5676*0b57cec5SDimitry Andricdefm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 5677*0b57cec5SDimitry Andric X86phminpos, load, 5678*0b57cec5SDimitry Andric WritePHMINPOS>, VEX, VEX_WIG; 5679*0b57cec5SDimitry Andricdefm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 5680*0b57cec5SDimitry Andric X86phminpos, memop, 5681*0b57cec5SDimitry Andric WritePHMINPOS>; 5682*0b57cec5SDimitry Andric 5683*0b57cec5SDimitry Andric/// SS48I_binop_rm - Simple SSE41 binary operator. 5684*0b57cec5SDimitry Andricmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5685*0b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5686*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 5687*0b57cec5SDimitry Andric bit Is2Addr = 1> { 5688*0b57cec5SDimitry Andric let isCommutable = 1 in 5689*0b57cec5SDimitry Andric def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 5690*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 5691*0b57cec5SDimitry Andric !if(Is2Addr, 5692*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5693*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5694*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 5695*0b57cec5SDimitry Andric Sched<[sched]>; 5696*0b57cec5SDimitry Andric def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 5697*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 5698*0b57cec5SDimitry Andric !if(Is2Addr, 5699*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5700*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5701*0b57cec5SDimitry Andric [(set RC:$dst, 5702*0b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 5703*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 5704*0b57cec5SDimitry Andric} 5705*0b57cec5SDimitry Andric 5706*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 5707*0b57cec5SDimitry Andric defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 5708*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5709*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5710*0b57cec5SDimitry Andric defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 5711*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5712*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5713*0b57cec5SDimitry Andric defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 5714*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5715*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5716*0b57cec5SDimitry Andric defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 5717*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5718*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5719*0b57cec5SDimitry Andric defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 5720*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecIMul.XMM, 0>, 5721*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5722*0b57cec5SDimitry Andric} 5723*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5724*0b57cec5SDimitry Andric defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 5725*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5726*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5727*0b57cec5SDimitry Andric defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 5728*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5729*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5730*0b57cec5SDimitry Andric defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 5731*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5732*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5733*0b57cec5SDimitry Andric defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 5734*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5735*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5736*0b57cec5SDimitry Andric} 5737*0b57cec5SDimitry Andric 5738*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 5739*0b57cec5SDimitry Andric defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 5740*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5741*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5742*0b57cec5SDimitry Andric defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 5743*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5744*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5745*0b57cec5SDimitry Andric defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 5746*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5747*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5748*0b57cec5SDimitry Andric defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 5749*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5750*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5751*0b57cec5SDimitry Andric defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 5752*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecIMul.YMM, 0>, 5753*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5754*0b57cec5SDimitry Andric} 5755*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 5756*0b57cec5SDimitry Andric defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 5757*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5758*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5759*0b57cec5SDimitry Andric defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 5760*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5761*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5762*0b57cec5SDimitry Andric defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 5763*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5764*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5765*0b57cec5SDimitry Andric defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 5766*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5767*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5768*0b57cec5SDimitry Andric} 5769*0b57cec5SDimitry Andric 5770*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 5771*0b57cec5SDimitry Andric defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 5772*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5773*0b57cec5SDimitry Andric defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 5774*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5775*0b57cec5SDimitry Andric defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 5776*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5777*0b57cec5SDimitry Andric defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 5778*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5779*0b57cec5SDimitry Andric defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 5780*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5781*0b57cec5SDimitry Andric defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 5782*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5783*0b57cec5SDimitry Andric defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 5784*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5785*0b57cec5SDimitry Andric defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 5786*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5787*0b57cec5SDimitry Andric defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 5788*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecIMul.XMM, 1>; 5789*0b57cec5SDimitry Andric} 5790*0b57cec5SDimitry Andric 5791*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 5792*0b57cec5SDimitry Andric defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 5793*0b57cec5SDimitry Andric load, i128mem, SchedWritePMULLD.XMM, 0>, 5794*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5795*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in 5796*0b57cec5SDimitry Andric defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 5797*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 5798*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 5799*0b57cec5SDimitry Andric 5800*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 5801*0b57cec5SDimitry Andric defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 5802*0b57cec5SDimitry Andric load, i256mem, SchedWritePMULLD.YMM, 0>, 5803*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5804*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 5805*0b57cec5SDimitry Andric defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 5806*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 5807*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 5808*0b57cec5SDimitry Andric 5809*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 5810*0b57cec5SDimitry Andric defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 5811*0b57cec5SDimitry Andric memop, i128mem, SchedWritePMULLD.XMM, 1>; 5812*0b57cec5SDimitry Andric defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 5813*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM, 1>; 5814*0b57cec5SDimitry Andric} 5815*0b57cec5SDimitry Andric 5816*0b57cec5SDimitry Andric/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 5817*0b57cec5SDimitry Andricmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 5818*0b57cec5SDimitry Andric Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 5819*0b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 5820*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 5821*0b57cec5SDimitry Andric let isCommutable = 1 in 5822*0b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5823*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 5824*0b57cec5SDimitry Andric !if(Is2Addr, 5825*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5826*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5827*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5828*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5829*0b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 5830*0b57cec5SDimitry Andric Sched<[sched]>; 5831*0b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5832*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5833*0b57cec5SDimitry Andric !if(Is2Addr, 5834*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5835*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5836*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5837*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5838*0b57cec5SDimitry Andric [(set RC:$dst, 5839*0b57cec5SDimitry Andric (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>, 5840*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 5841*0b57cec5SDimitry Andric} 5842*0b57cec5SDimitry Andric 5843*0b57cec5SDimitry Andric/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 5844*0b57cec5SDimitry Andricmulticlass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 5845*0b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5846*0b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, 5847*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 5848*0b57cec5SDimitry Andric let isCommutable = 1 in 5849*0b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5850*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 5851*0b57cec5SDimitry Andric !if(Is2Addr, 5852*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5853*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5854*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5855*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5856*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, 5857*0b57cec5SDimitry Andric Sched<[sched]>; 5858*0b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5859*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5860*0b57cec5SDimitry Andric !if(Is2Addr, 5861*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5862*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5863*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 5864*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5865*0b57cec5SDimitry Andric [(set RC:$dst, 5866*0b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>, 5867*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 5868*0b57cec5SDimitry Andric} 5869*0b57cec5SDimitry Andric 5870*0b57cec5SDimitry Andricdef BlendCommuteImm2 : SDNodeXForm<imm, [{ 5871*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x03; 5872*0b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x03, SDLoc(N)); 5873*0b57cec5SDimitry Andric}]>; 5874*0b57cec5SDimitry Andric 5875*0b57cec5SDimitry Andricdef BlendCommuteImm4 : SDNodeXForm<imm, [{ 5876*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0x0f; 5877*0b57cec5SDimitry Andric return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 5878*0b57cec5SDimitry Andric}]>; 5879*0b57cec5SDimitry Andric 5880*0b57cec5SDimitry Andricdef BlendCommuteImm8 : SDNodeXForm<imm, [{ 5881*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue() & 0xff; 5882*0b57cec5SDimitry Andric return getI8Imm(Imm ^ 0xff, SDLoc(N)); 5883*0b57cec5SDimitry Andric}]>; 5884*0b57cec5SDimitry Andric 5885*0b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 5886*0b57cec5SDimitry Andricdef BlendScaleImm4 : SDNodeXForm<imm, [{ 5887*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5888*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5889*0b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 5890*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5891*0b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 5892*0b57cec5SDimitry Andric } 5893*0b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 5894*0b57cec5SDimitry Andric}]>; 5895*0b57cec5SDimitry Andric 5896*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 5897*0b57cec5SDimitry Andricdef BlendScaleImm2 : SDNodeXForm<imm, [{ 5898*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5899*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5900*0b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 5901*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5902*0b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 5903*0b57cec5SDimitry Andric } 5904*0b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 5905*0b57cec5SDimitry Andric}]>; 5906*0b57cec5SDimitry Andric 5907*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 5908*0b57cec5SDimitry Andricdef BlendScaleImm2to4 : SDNodeXForm<imm, [{ 5909*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5910*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5911*0b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 5912*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5913*0b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 5914*0b57cec5SDimitry Andric } 5915*0b57cec5SDimitry Andric return getI8Imm(NewImm, SDLoc(N)); 5916*0b57cec5SDimitry Andric}]>; 5917*0b57cec5SDimitry Andric 5918*0b57cec5SDimitry Andric// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 5919*0b57cec5SDimitry Andricdef BlendScaleCommuteImm4 : SDNodeXForm<imm, [{ 5920*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5921*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5922*0b57cec5SDimitry Andric for (unsigned i = 0; i != 4; ++i) { 5923*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5924*0b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 5925*0b57cec5SDimitry Andric } 5926*0b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 5927*0b57cec5SDimitry Andric}]>; 5928*0b57cec5SDimitry Andric 5929*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 5930*0b57cec5SDimitry Andricdef BlendScaleCommuteImm2 : SDNodeXForm<imm, [{ 5931*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5932*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5933*0b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 5934*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5935*0b57cec5SDimitry Andric NewImm |= 0xf << (i * 4); 5936*0b57cec5SDimitry Andric } 5937*0b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 5938*0b57cec5SDimitry Andric}]>; 5939*0b57cec5SDimitry Andric 5940*0b57cec5SDimitry Andric// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 5941*0b57cec5SDimitry Andricdef BlendScaleCommuteImm2to4 : SDNodeXForm<imm, [{ 5942*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 5943*0b57cec5SDimitry Andric uint8_t NewImm = 0; 5944*0b57cec5SDimitry Andric for (unsigned i = 0; i != 2; ++i) { 5945*0b57cec5SDimitry Andric if (Imm & (1 << i)) 5946*0b57cec5SDimitry Andric NewImm |= 0x3 << (i * 2); 5947*0b57cec5SDimitry Andric } 5948*0b57cec5SDimitry Andric return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 5949*0b57cec5SDimitry Andric}]>; 5950*0b57cec5SDimitry Andric 5951*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 5952*0b57cec5SDimitry Andric let isCommutable = 0 in { 5953*0b57cec5SDimitry Andric defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 5954*0b57cec5SDimitry Andric VR128, load, i128mem, 0, 5955*0b57cec5SDimitry Andric SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; 5956*0b57cec5SDimitry Andric } 5957*0b57cec5SDimitry Andric 5958*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 5959*0b57cec5SDimitry Andric defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 5960*0b57cec5SDimitry Andric VR128, load, f128mem, 0, 5961*0b57cec5SDimitry Andric SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; 5962*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 5963*0b57cec5SDimitry Andric defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 5964*0b57cec5SDimitry Andric VR128, load, f128mem, 0, 5965*0b57cec5SDimitry Andric SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; 5966*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 5967*0b57cec5SDimitry Andric defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 5968*0b57cec5SDimitry Andric VR256, load, i256mem, 0, 5969*0b57cec5SDimitry Andric SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; 5970*0b57cec5SDimitry Andric} 5971*0b57cec5SDimitry Andric 5972*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 5973*0b57cec5SDimitry Andric let isCommutable = 0 in { 5974*0b57cec5SDimitry Andric defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 5975*0b57cec5SDimitry Andric VR256, load, i256mem, 0, 5976*0b57cec5SDimitry Andric SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; 5977*0b57cec5SDimitry Andric } 5978*0b57cec5SDimitry Andric} 5979*0b57cec5SDimitry Andric 5980*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 5981*0b57cec5SDimitry Andric let isCommutable = 0 in { 5982*0b57cec5SDimitry Andric defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 5983*0b57cec5SDimitry Andric VR128, memop, i128mem, 1, 5984*0b57cec5SDimitry Andric SchedWriteMPSAD.XMM>; 5985*0b57cec5SDimitry Andric } 5986*0b57cec5SDimitry Andric 5987*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in 5988*0b57cec5SDimitry Andric defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 5989*0b57cec5SDimitry Andric VR128, memop, f128mem, 1, 5990*0b57cec5SDimitry Andric SchedWriteDPPS.XMM>; 5991*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in 5992*0b57cec5SDimitry Andric defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 5993*0b57cec5SDimitry Andric VR128, memop, f128mem, 1, 5994*0b57cec5SDimitry Andric SchedWriteDPPD.XMM>; 5995*0b57cec5SDimitry Andric} 5996*0b57cec5SDimitry Andric 5997*0b57cec5SDimitry Andric/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 5998*0b57cec5SDimitry Andricmulticlass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 5999*0b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6000*0b57cec5SDimitry Andric X86MemOperand x86memop, bit Is2Addr, Domain d, 6001*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 6002*0b57cec5SDimitry Andriclet ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 6003*0b57cec5SDimitry Andric let isCommutable = 1 in 6004*0b57cec5SDimitry Andric def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6005*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 6006*0b57cec5SDimitry Andric !if(Is2Addr, 6007*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6008*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6009*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6010*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6011*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, 6012*0b57cec5SDimitry Andric Sched<[sched]>; 6013*0b57cec5SDimitry Andric def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6014*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 6015*0b57cec5SDimitry Andric !if(Is2Addr, 6016*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6017*0b57cec5SDimitry Andric "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6018*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6019*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6020*0b57cec5SDimitry Andric [(set RC:$dst, 6021*0b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>, 6022*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 6023*0b57cec5SDimitry Andric} 6024*0b57cec5SDimitry Andric 6025*0b57cec5SDimitry Andric // Pattern to commute if load is in first source. 6026*0b57cec5SDimitry Andric def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)), 6027*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 6028*0b57cec5SDimitry Andric (commuteXForm imm:$src3))>; 6029*0b57cec5SDimitry Andric} 6030*0b57cec5SDimitry Andric 6031*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 6032*0b57cec5SDimitry Andric defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 6033*0b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedSingle, 6034*0b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>, 6035*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6036*0b57cec5SDimitry Andric defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 6037*0b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedSingle, 6038*0b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm8>, 6039*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 6040*0b57cec5SDimitry Andric defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 6041*0b57cec5SDimitry Andric VR128, load, f128mem, 0, SSEPackedDouble, 6042*0b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>, 6043*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6044*0b57cec5SDimitry Andric defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 6045*0b57cec5SDimitry Andric VR256, load, f256mem, 0, SSEPackedDouble, 6046*0b57cec5SDimitry Andric SchedWriteFBlend.YMM, BlendCommuteImm4>, 6047*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 6048*0b57cec5SDimitry Andric defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 6049*0b57cec5SDimitry Andric VR128, load, i128mem, 0, SSEPackedInt, 6050*0b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>, 6051*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6052*0b57cec5SDimitry Andric} 6053*0b57cec5SDimitry Andric 6054*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 6055*0b57cec5SDimitry Andric defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 6056*0b57cec5SDimitry Andric VR256, load, i256mem, 0, SSEPackedInt, 6057*0b57cec5SDimitry Andric SchedWriteBlend.YMM, BlendCommuteImm8>, 6058*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 6059*0b57cec5SDimitry Andric} 6060*0b57cec5SDimitry Andric 6061*0b57cec5SDimitry Andric// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 6062*0b57cec5SDimitry Andric// ExecutionDomainFixPass will cleanup domains later on. 6063*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 6064*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), 6065*0b57cec5SDimitry Andric (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; 6066*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), 6067*0b57cec5SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; 6068*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), 6069*0b57cec5SDimitry Andric (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; 6070*0b57cec5SDimitry Andric 6071*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6072*0b57cec5SDimitry Andric// it from becoming movsd via commuting under optsize. 6073*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), 6074*0b57cec5SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; 6075*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), 6076*0b57cec5SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; 6077*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), 6078*0b57cec5SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; 6079*0b57cec5SDimitry Andric 6080*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3), 6081*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; 6082*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3), 6083*0b57cec5SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; 6084*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3), 6085*0b57cec5SDimitry Andric (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; 6086*0b57cec5SDimitry Andric 6087*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6088*0b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 6089*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), 6090*0b57cec5SDimitry Andric (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; 6091*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3), 6092*0b57cec5SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; 6093*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3), 6094*0b57cec5SDimitry Andric (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; 6095*0b57cec5SDimitry Andric} 6096*0b57cec5SDimitry Andric 6097*0b57cec5SDimitry Andricdefm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 6098*0b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedSingle, 6099*0b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm4>; 6100*0b57cec5SDimitry Andricdefm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 6101*0b57cec5SDimitry Andric VR128, memop, f128mem, 1, SSEPackedDouble, 6102*0b57cec5SDimitry Andric SchedWriteFBlend.XMM, BlendCommuteImm2>; 6103*0b57cec5SDimitry Andricdefm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 6104*0b57cec5SDimitry Andric VR128, memop, i128mem, 1, SSEPackedInt, 6105*0b57cec5SDimitry Andric SchedWriteBlend.XMM, BlendCommuteImm8>; 6106*0b57cec5SDimitry Andric 6107*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 6108*0b57cec5SDimitry Andric// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6109*0b57cec5SDimitry Andric// it from becoming movss via commuting under optsize. 6110*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), 6111*0b57cec5SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; 6112*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3), 6113*0b57cec5SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; 6114*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3), 6115*0b57cec5SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; 6116*0b57cec5SDimitry Andric 6117*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), 6118*0b57cec5SDimitry Andric (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; 6119*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3), 6120*0b57cec5SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; 6121*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3), 6122*0b57cec5SDimitry Andric (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; 6123*0b57cec5SDimitry Andric} 6124*0b57cec5SDimitry Andric 6125*0b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 6126*0b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 6127*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 6128*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 6129*0b57cec5SDimitry Andric (VBLENDPDYrri VR256:$src1, 6130*0b57cec5SDimitry Andric (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6131*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0x3)>; 6132*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 6133*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 6134*0b57cec5SDimitry Andric (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6135*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 6136*0b57cec5SDimitry Andric 6137*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 6138*0b57cec5SDimitry Andric (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6139*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 6140*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 6141*0b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6142*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 6143*0b57cec5SDimitry Andric} 6144*0b57cec5SDimitry Andric 6145*0b57cec5SDimitry Andric/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 6146*0b57cec5SDimitry Andricmulticlass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 6147*0b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 6148*0b57cec5SDimitry Andric PatFrag mem_frag, SDNode OpNode, 6149*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 6150*0b57cec5SDimitry Andric def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 6151*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, RC:$src3), 6152*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6153*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6154*0b57cec5SDimitry Andric [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 6155*0b57cec5SDimitry Andric SSEPackedInt>, TAPD, VEX_4V, 6156*0b57cec5SDimitry Andric Sched<[sched]>; 6157*0b57cec5SDimitry Andric 6158*0b57cec5SDimitry Andric def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 6159*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, RC:$src3), 6160*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6161*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6162*0b57cec5SDimitry Andric [(set RC:$dst, 6163*0b57cec5SDimitry Andric (OpNode RC:$src3, (mem_frag addr:$src2), 6164*0b57cec5SDimitry Andric RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, 6165*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold, 6166*0b57cec5SDimitry Andric // x86memop:$src2 6167*0b57cec5SDimitry Andric ReadDefault, ReadDefault, ReadDefault, ReadDefault, 6168*0b57cec5SDimitry Andric ReadDefault, 6169*0b57cec5SDimitry Andric // RC::$src3 6170*0b57cec5SDimitry Andric sched.ReadAfterFold]>; 6171*0b57cec5SDimitry Andric} 6172*0b57cec5SDimitry Andric 6173*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 6174*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 6175*0b57cec5SDimitry Andricdefm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 6176*0b57cec5SDimitry Andric v2f64, loadv2f64, X86Blendv, 6177*0b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 6178*0b57cec5SDimitry Andricdefm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 6179*0b57cec5SDimitry Andric v4f64, loadv4f64, X86Blendv, 6180*0b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 6181*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedDouble 6182*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 6183*0b57cec5SDimitry Andricdefm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 6184*0b57cec5SDimitry Andric v4f32, loadv4f32, X86Blendv, 6185*0b57cec5SDimitry Andric SchedWriteFVarBlend.XMM>; 6186*0b57cec5SDimitry Andricdefm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 6187*0b57cec5SDimitry Andric v8f32, loadv8f32, X86Blendv, 6188*0b57cec5SDimitry Andric SchedWriteFVarBlend.YMM>, VEX_L; 6189*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedSingle 6190*0b57cec5SDimitry Andricdefm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 6191*0b57cec5SDimitry Andric v16i8, loadv16i8, X86Blendv, 6192*0b57cec5SDimitry Andric SchedWriteVarBlend.XMM>; 6193*0b57cec5SDimitry Andric} 6194*0b57cec5SDimitry Andric 6195*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 6196*0b57cec5SDimitry Andricdefm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 6197*0b57cec5SDimitry Andric v32i8, loadv32i8, X86Blendv, 6198*0b57cec5SDimitry Andric SchedWriteVarBlend.YMM>, VEX_L; 6199*0b57cec5SDimitry Andric} 6200*0b57cec5SDimitry Andric 6201*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 6202*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 6203*0b57cec5SDimitry Andric (v4i32 VR128:$src2))), 6204*0b57cec5SDimitry Andric (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6205*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 6206*0b57cec5SDimitry Andric (v2i64 VR128:$src2))), 6207*0b57cec5SDimitry Andric (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6208*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 6209*0b57cec5SDimitry Andric (v8i32 VR256:$src2))), 6210*0b57cec5SDimitry Andric (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6211*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 6212*0b57cec5SDimitry Andric (v4i64 VR256:$src2))), 6213*0b57cec5SDimitry Andric (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6214*0b57cec5SDimitry Andric} 6215*0b57cec5SDimitry Andric 6216*0b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 6217*0b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 6218*0b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6219*0b57cec5SDimitry Andriclet Predicates = [HasAVX, OptForSpeed] in { 6220*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6221*0b57cec5SDimitry Andric (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6222*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6223*0b57cec5SDimitry Andric (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6224*0b57cec5SDimitry Andric 6225*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6226*0b57cec5SDimitry Andric (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6227*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 6228*0b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6229*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 6230*0b57cec5SDimitry Andric (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6231*0b57cec5SDimitry Andric 6232*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6233*0b57cec5SDimitry Andric (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6234*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 6235*0b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6236*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 6237*0b57cec5SDimitry Andric (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6238*0b57cec5SDimitry Andric 6239*0b57cec5SDimitry Andric // Move low f32 and clear high bits. 6240*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 6241*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 6242*0b57cec5SDimitry Andric (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 6243*0b57cec5SDimitry Andric (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 6244*0b57cec5SDimitry Andric (i8 1))), sub_xmm)>; 6245*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 6246*0b57cec5SDimitry Andric (SUBREG_TO_REG (i32 0), 6247*0b57cec5SDimitry Andric (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 6248*0b57cec5SDimitry Andric (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 6249*0b57cec5SDimitry Andric (i8 3))), sub_xmm)>; 6250*0b57cec5SDimitry Andric} 6251*0b57cec5SDimitry Andric 6252*0b57cec5SDimitry Andric// Prefer a movss or movsd over a blendps when optimizing for size. these were 6253*0b57cec5SDimitry Andric// changed to use blends because blends have better throughput on sandybridge 6254*0b57cec5SDimitry Andric// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6255*0b57cec5SDimitry Andriclet Predicates = [UseSSE41, OptForSpeed] in { 6256*0b57cec5SDimitry Andric // With SSE41 we can use blends for these patterns. 6257*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6258*0b57cec5SDimitry Andric (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6259*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6260*0b57cec5SDimitry Andric (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6261*0b57cec5SDimitry Andric 6262*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6263*0b57cec5SDimitry Andric (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6264*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 6265*0b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6266*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 6267*0b57cec5SDimitry Andric (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6268*0b57cec5SDimitry Andric 6269*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6270*0b57cec5SDimitry Andric (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6271*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 6272*0b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6273*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 6274*0b57cec5SDimitry Andric (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6275*0b57cec5SDimitry Andric} 6276*0b57cec5SDimitry Andric 6277*0b57cec5SDimitry Andric 6278*0b57cec5SDimitry Andric/// SS41I_ternary - SSE 4.1 ternary operator 6279*0b57cec5SDimitry Andriclet Uses = [XMM0], Constraints = "$src1 = $dst" in { 6280*0b57cec5SDimitry Andric multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 6281*0b57cec5SDimitry Andric PatFrag mem_frag, X86MemOperand x86memop, 6282*0b57cec5SDimitry Andric SDNode OpNode, X86FoldableSchedWrite sched> { 6283*0b57cec5SDimitry Andric def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6284*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 6285*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6286*0b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6287*0b57cec5SDimitry Andric [(set VR128:$dst, 6288*0b57cec5SDimitry Andric (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 6289*0b57cec5SDimitry Andric Sched<[sched]>; 6290*0b57cec5SDimitry Andric 6291*0b57cec5SDimitry Andric def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6292*0b57cec5SDimitry Andric (ins VR128:$src1, x86memop:$src2), 6293*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 6294*0b57cec5SDimitry Andric "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6295*0b57cec5SDimitry Andric [(set VR128:$dst, 6296*0b57cec5SDimitry Andric (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 6297*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 6298*0b57cec5SDimitry Andric } 6299*0b57cec5SDimitry Andric} 6300*0b57cec5SDimitry Andric 6301*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 6302*0b57cec5SDimitry Andricdefm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 6303*0b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 6304*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 6305*0b57cec5SDimitry Andricdefm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 6306*0b57cec5SDimitry Andric X86Blendv, SchedWriteFVarBlend.XMM>; 6307*0b57cec5SDimitry Andricdefm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 6308*0b57cec5SDimitry Andric X86Blendv, SchedWriteVarBlend.XMM>; 6309*0b57cec5SDimitry Andric 6310*0b57cec5SDimitry Andric// Aliases with the implicit xmm0 argument 6311*0b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6312*0b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 6313*0b57cec5SDimitry Andricdef : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6314*0b57cec5SDimitry Andric (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 6315*0b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6316*0b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 6317*0b57cec5SDimitry Andricdef : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6318*0b57cec5SDimitry Andric (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 6319*0b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6320*0b57cec5SDimitry Andric (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 6321*0b57cec5SDimitry Andricdef : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6322*0b57cec5SDimitry Andric (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 6323*0b57cec5SDimitry Andric 6324*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 6325*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 6326*0b57cec5SDimitry Andric (v4i32 VR128:$src2))), 6327*0b57cec5SDimitry Andric (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6328*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 6329*0b57cec5SDimitry Andric (v2i64 VR128:$src2))), 6330*0b57cec5SDimitry Andric (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6331*0b57cec5SDimitry Andric} 6332*0b57cec5SDimitry Andric 6333*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 6334*0b57cec5SDimitry Andric 6335*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in 6336*0b57cec5SDimitry Andricdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6337*0b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6338*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; 6339*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in 6340*0b57cec5SDimitry Andricdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 6341*0b57cec5SDimitry Andric "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6342*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; 6343*0b57cec5SDimitry Andricdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6344*0b57cec5SDimitry Andric "movntdqa\t{$src, $dst|$dst, $src}", []>, 6345*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 6346*0b57cec5SDimitry Andric 6347*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 6348*0b57cec5SDimitry Andric def : Pat<(v8f32 (alignednontemporalload addr:$src)), 6349*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6350*0b57cec5SDimitry Andric def : Pat<(v4f64 (alignednontemporalload addr:$src)), 6351*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6352*0b57cec5SDimitry Andric def : Pat<(v4i64 (alignednontemporalload addr:$src)), 6353*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6354*0b57cec5SDimitry Andric def : Pat<(v8i32 (alignednontemporalload addr:$src)), 6355*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6356*0b57cec5SDimitry Andric def : Pat<(v16i16 (alignednontemporalload addr:$src)), 6357*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6358*0b57cec5SDimitry Andric def : Pat<(v32i8 (alignednontemporalload addr:$src)), 6359*0b57cec5SDimitry Andric (VMOVNTDQAYrm addr:$src)>; 6360*0b57cec5SDimitry Andric} 6361*0b57cec5SDimitry Andric 6362*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 6363*0b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6364*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6365*0b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6366*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6367*0b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6368*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6369*0b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6370*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6371*0b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6372*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6373*0b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6374*0b57cec5SDimitry Andric (VMOVNTDQArm addr:$src)>; 6375*0b57cec5SDimitry Andric} 6376*0b57cec5SDimitry Andric 6377*0b57cec5SDimitry Andriclet Predicates = [UseSSE41] in { 6378*0b57cec5SDimitry Andric def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6379*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6380*0b57cec5SDimitry Andric def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6381*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6382*0b57cec5SDimitry Andric def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6383*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6384*0b57cec5SDimitry Andric def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6385*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6386*0b57cec5SDimitry Andric def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6387*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6388*0b57cec5SDimitry Andric def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6389*0b57cec5SDimitry Andric (MOVNTDQArm addr:$src)>; 6390*0b57cec5SDimitry Andric} 6391*0b57cec5SDimitry Andric 6392*0b57cec5SDimitry Andric} // AddedComplexity 6393*0b57cec5SDimitry Andric 6394*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6395*0b57cec5SDimitry Andric// SSE4.2 - Compare Instructions 6396*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6397*0b57cec5SDimitry Andric 6398*0b57cec5SDimitry Andric/// SS42I_binop_rm - Simple SSE 4.2 binary operator 6399*0b57cec5SDimitry Andricmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6400*0b57cec5SDimitry Andric ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6401*0b57cec5SDimitry Andric X86MemOperand x86memop, X86FoldableSchedWrite sched, 6402*0b57cec5SDimitry Andric bit Is2Addr = 1> { 6403*0b57cec5SDimitry Andric def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 6404*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 6405*0b57cec5SDimitry Andric !if(Is2Addr, 6406*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6407*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6408*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6409*0b57cec5SDimitry Andric Sched<[sched]>; 6410*0b57cec5SDimitry Andric def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 6411*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2), 6412*0b57cec5SDimitry Andric !if(Is2Addr, 6413*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6414*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6415*0b57cec5SDimitry Andric [(set RC:$dst, 6416*0b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 6417*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 6418*0b57cec5SDimitry Andric} 6419*0b57cec5SDimitry Andric 6420*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in 6421*0b57cec5SDimitry Andric defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 6422*0b57cec5SDimitry Andric load, i128mem, SchedWriteVecALU.XMM, 0>, 6423*0b57cec5SDimitry Andric VEX_4V, VEX_WIG; 6424*0b57cec5SDimitry Andric 6425*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 6426*0b57cec5SDimitry Andric defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 6427*0b57cec5SDimitry Andric load, i256mem, SchedWriteVecALU.YMM, 0>, 6428*0b57cec5SDimitry Andric VEX_4V, VEX_L, VEX_WIG; 6429*0b57cec5SDimitry Andric 6430*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in 6431*0b57cec5SDimitry Andric defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 6432*0b57cec5SDimitry Andric memop, i128mem, SchedWriteVecALU.XMM>; 6433*0b57cec5SDimitry Andric 6434*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6435*0b57cec5SDimitry Andric// SSE4.2 - String/text Processing Instructions 6436*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6437*0b57cec5SDimitry Andric 6438*0b57cec5SDimitry Andricmulticlass pcmpistrm_SS42AI<string asm> { 6439*0b57cec5SDimitry Andric def rr : SS42AI<0x62, MRMSrcReg, (outs), 6440*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6441*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6442*0b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM]>; 6443*0b57cec5SDimitry Andric let mayLoad = 1 in 6444*0b57cec5SDimitry Andric def rm :SS42AI<0x62, MRMSrcMem, (outs), 6445*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6446*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6447*0b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 6448*0b57cec5SDimitry Andric} 6449*0b57cec5SDimitry Andric 6450*0b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 6451*0b57cec5SDimitry Andric let Predicates = [HasAVX] in 6452*0b57cec5SDimitry Andric defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 6453*0b57cec5SDimitry Andric defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 6454*0b57cec5SDimitry Andric} 6455*0b57cec5SDimitry Andric 6456*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpestrm<string asm> { 6457*0b57cec5SDimitry Andric def rr : SS42AI<0x60, MRMSrcReg, (outs), 6458*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6459*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6460*0b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM]>; 6461*0b57cec5SDimitry Andric let mayLoad = 1 in 6462*0b57cec5SDimitry Andric def rm : SS42AI<0x60, MRMSrcMem, (outs), 6463*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6464*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6465*0b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 6466*0b57cec5SDimitry Andric} 6467*0b57cec5SDimitry Andric 6468*0b57cec5SDimitry Andriclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6469*0b57cec5SDimitry Andric let Predicates = [HasAVX] in 6470*0b57cec5SDimitry Andric defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 6471*0b57cec5SDimitry Andric defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 6472*0b57cec5SDimitry Andric} 6473*0b57cec5SDimitry Andric 6474*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpistri<string asm> { 6475*0b57cec5SDimitry Andric def rr : SS42AI<0x63, MRMSrcReg, (outs), 6476*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6477*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6478*0b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI]>; 6479*0b57cec5SDimitry Andric let mayLoad = 1 in 6480*0b57cec5SDimitry Andric def rm : SS42AI<0x63, MRMSrcMem, (outs), 6481*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6482*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6483*0b57cec5SDimitry Andric []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 6484*0b57cec5SDimitry Andric} 6485*0b57cec5SDimitry Andric 6486*0b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 6487*0b57cec5SDimitry Andric let Predicates = [HasAVX] in 6488*0b57cec5SDimitry Andric defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 6489*0b57cec5SDimitry Andric defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 6490*0b57cec5SDimitry Andric} 6491*0b57cec5SDimitry Andric 6492*0b57cec5SDimitry Andricmulticlass SS42AI_pcmpestri<string asm> { 6493*0b57cec5SDimitry Andric def rr : SS42AI<0x61, MRMSrcReg, (outs), 6494*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6495*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6496*0b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI]>; 6497*0b57cec5SDimitry Andric let mayLoad = 1 in 6498*0b57cec5SDimitry Andric def rm : SS42AI<0x61, MRMSrcMem, (outs), 6499*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6500*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6501*0b57cec5SDimitry Andric []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 6502*0b57cec5SDimitry Andric} 6503*0b57cec5SDimitry Andric 6504*0b57cec5SDimitry Andriclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6505*0b57cec5SDimitry Andric let Predicates = [HasAVX] in 6506*0b57cec5SDimitry Andric defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 6507*0b57cec5SDimitry Andric defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 6508*0b57cec5SDimitry Andric} 6509*0b57cec5SDimitry Andric 6510*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6511*0b57cec5SDimitry Andric// SSE4.2 - CRC Instructions 6512*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6513*0b57cec5SDimitry Andric 6514*0b57cec5SDimitry Andric// No CRC instructions have AVX equivalents 6515*0b57cec5SDimitry Andric 6516*0b57cec5SDimitry Andric// crc intrinsic instruction 6517*0b57cec5SDimitry Andric// This set of instructions are only rm, the only difference is the size 6518*0b57cec5SDimitry Andric// of r and m. 6519*0b57cec5SDimitry Andricclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 6520*0b57cec5SDimitry Andric RegisterClass RCIn, SDPatternOperator Int> : 6521*0b57cec5SDimitry Andric SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 6522*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6523*0b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, 6524*0b57cec5SDimitry Andric Sched<[WriteCRC32]>; 6525*0b57cec5SDimitry Andric 6526*0b57cec5SDimitry Andricclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 6527*0b57cec5SDimitry Andric X86MemOperand x86memop, SDPatternOperator Int> : 6528*0b57cec5SDimitry Andric SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 6529*0b57cec5SDimitry Andric !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6530*0b57cec5SDimitry Andric [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, 6531*0b57cec5SDimitry Andric Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; 6532*0b57cec5SDimitry Andric 6533*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 6534*0b57cec5SDimitry Andric def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 6535*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 6536*0b57cec5SDimitry Andric def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 6537*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_8>; 6538*0b57cec5SDimitry Andric def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 6539*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 6540*0b57cec5SDimitry Andric def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 6541*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_16>, OpSize16; 6542*0b57cec5SDimitry Andric def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 6543*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 6544*0b57cec5SDimitry Andric def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 6545*0b57cec5SDimitry Andric int_x86_sse42_crc32_32_32>, OpSize32; 6546*0b57cec5SDimitry Andric def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 6547*0b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 6548*0b57cec5SDimitry Andric def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 6549*0b57cec5SDimitry Andric int_x86_sse42_crc32_64_64>, REX_W; 6550*0b57cec5SDimitry Andric let hasSideEffects = 0 in { 6551*0b57cec5SDimitry Andric let mayLoad = 1 in 6552*0b57cec5SDimitry Andric def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 6553*0b57cec5SDimitry Andric null_frag>, REX_W; 6554*0b57cec5SDimitry Andric def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 6555*0b57cec5SDimitry Andric null_frag>, REX_W; 6556*0b57cec5SDimitry Andric } 6557*0b57cec5SDimitry Andric} 6558*0b57cec5SDimitry Andric 6559*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6560*0b57cec5SDimitry Andric// SHA-NI Instructions 6561*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6562*0b57cec5SDimitry Andric 6563*0b57cec5SDimitry Andric// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 6564*0b57cec5SDimitry Andricmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 6565*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 6566*0b57cec5SDimitry Andric def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 6567*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 6568*0b57cec5SDimitry Andric !if(UsesXMM0, 6569*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6570*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6571*0b57cec5SDimitry Andric [!if(UsesXMM0, 6572*0b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 6573*0b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 6574*0b57cec5SDimitry Andric T8, Sched<[sched]>; 6575*0b57cec5SDimitry Andric 6576*0b57cec5SDimitry Andric def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 6577*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 6578*0b57cec5SDimitry Andric !if(UsesXMM0, 6579*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6580*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6581*0b57cec5SDimitry Andric [!if(UsesXMM0, 6582*0b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 6583*0b57cec5SDimitry Andric (memop addr:$src2), XMM0)), 6584*0b57cec5SDimitry Andric (set VR128:$dst, (IntId VR128:$src1, 6585*0b57cec5SDimitry Andric (memop addr:$src2))))]>, T8, 6586*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>; 6587*0b57cec5SDimitry Andric} 6588*0b57cec5SDimitry Andric 6589*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 6590*0b57cec5SDimitry Andric def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 6591*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6592*0b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6593*0b57cec5SDimitry Andric [(set VR128:$dst, 6594*0b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 6595*0b57cec5SDimitry Andric (i8 imm:$src3)))]>, TA, 6596*0b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM]>; 6597*0b57cec5SDimitry Andric def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 6598*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6599*0b57cec5SDimitry Andric "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6600*0b57cec5SDimitry Andric [(set VR128:$dst, 6601*0b57cec5SDimitry Andric (int_x86_sha1rnds4 VR128:$src1, 6602*0b57cec5SDimitry Andric (memop addr:$src2), 6603*0b57cec5SDimitry Andric (i8 imm:$src3)))]>, TA, 6604*0b57cec5SDimitry Andric Sched<[SchedWriteVecIMul.XMM.Folded, 6605*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM.ReadAfterFold]>; 6606*0b57cec5SDimitry Andric 6607*0b57cec5SDimitry Andric defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 6608*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 6609*0b57cec5SDimitry Andric defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 6610*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 6611*0b57cec5SDimitry Andric defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 6612*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 6613*0b57cec5SDimitry Andric 6614*0b57cec5SDimitry Andric let Uses=[XMM0] in 6615*0b57cec5SDimitry Andric defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 6616*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM, 1>; 6617*0b57cec5SDimitry Andric 6618*0b57cec5SDimitry Andric defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 6619*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 6620*0b57cec5SDimitry Andric defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 6621*0b57cec5SDimitry Andric SchedWriteVecIMul.XMM>; 6622*0b57cec5SDimitry Andric} 6623*0b57cec5SDimitry Andric 6624*0b57cec5SDimitry Andric// Aliases with explicit %xmm0 6625*0b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6626*0b57cec5SDimitry Andric (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; 6627*0b57cec5SDimitry Andricdef : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6628*0b57cec5SDimitry Andric (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; 6629*0b57cec5SDimitry Andric 6630*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6631*0b57cec5SDimitry Andric// AES-NI Instructions 6632*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6633*0b57cec5SDimitry Andric 6634*0b57cec5SDimitry Andricmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 6635*0b57cec5SDimitry Andric Intrinsic IntId, PatFrag ld_frag, 6636*0b57cec5SDimitry Andric bit Is2Addr = 0, RegisterClass RC = VR128, 6637*0b57cec5SDimitry Andric X86MemOperand MemOp = i128mem> { 6638*0b57cec5SDimitry Andric let AsmString = OpcodeStr## 6639*0b57cec5SDimitry Andric !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 6640*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 6641*0b57cec5SDimitry Andric def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 6642*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), "", 6643*0b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 6644*0b57cec5SDimitry Andric Sched<[WriteAESDecEnc]>; 6645*0b57cec5SDimitry Andric def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 6646*0b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2), "", 6647*0b57cec5SDimitry Andric [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 6648*0b57cec5SDimitry Andric Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 6649*0b57cec5SDimitry Andric } 6650*0b57cec5SDimitry Andric} 6651*0b57cec5SDimitry Andric 6652*0b57cec5SDimitry Andric// Perform One Round of an AES Encryption/Decryption Flow 6653*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 6654*0b57cec5SDimitry Andric defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 6655*0b57cec5SDimitry Andric int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG; 6656*0b57cec5SDimitry Andric defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 6657*0b57cec5SDimitry Andric int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG; 6658*0b57cec5SDimitry Andric defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 6659*0b57cec5SDimitry Andric int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG; 6660*0b57cec5SDimitry Andric defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 6661*0b57cec5SDimitry Andric int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG; 6662*0b57cec5SDimitry Andric} 6663*0b57cec5SDimitry Andric 6664*0b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVAES] in { 6665*0b57cec5SDimitry Andric defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 6666*0b57cec5SDimitry Andric int_x86_aesni_aesenc_256, load, 0, VR256, 6667*0b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 6668*0b57cec5SDimitry Andric defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 6669*0b57cec5SDimitry Andric int_x86_aesni_aesenclast_256, load, 0, VR256, 6670*0b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 6671*0b57cec5SDimitry Andric defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 6672*0b57cec5SDimitry Andric int_x86_aesni_aesdec_256, load, 0, VR256, 6673*0b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 6674*0b57cec5SDimitry Andric defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 6675*0b57cec5SDimitry Andric int_x86_aesni_aesdeclast_256, load, 0, VR256, 6676*0b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L, VEX_WIG; 6677*0b57cec5SDimitry Andric} 6678*0b57cec5SDimitry Andric 6679*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst" in { 6680*0b57cec5SDimitry Andric defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 6681*0b57cec5SDimitry Andric int_x86_aesni_aesenc, memop, 1>; 6682*0b57cec5SDimitry Andric defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 6683*0b57cec5SDimitry Andric int_x86_aesni_aesenclast, memop, 1>; 6684*0b57cec5SDimitry Andric defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 6685*0b57cec5SDimitry Andric int_x86_aesni_aesdec, memop, 1>; 6686*0b57cec5SDimitry Andric defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 6687*0b57cec5SDimitry Andric int_x86_aesni_aesdeclast, memop, 1>; 6688*0b57cec5SDimitry Andric} 6689*0b57cec5SDimitry Andric 6690*0b57cec5SDimitry Andric// Perform the AES InvMixColumn Transformation 6691*0b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 6692*0b57cec5SDimitry Andric def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6693*0b57cec5SDimitry Andric (ins VR128:$src1), 6694*0b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 6695*0b57cec5SDimitry Andric [(set VR128:$dst, 6696*0b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 6697*0b57cec5SDimitry Andric VEX, VEX_WIG; 6698*0b57cec5SDimitry Andric def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6699*0b57cec5SDimitry Andric (ins i128mem:$src1), 6700*0b57cec5SDimitry Andric "vaesimc\t{$src1, $dst|$dst, $src1}", 6701*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 6702*0b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; 6703*0b57cec5SDimitry Andric} 6704*0b57cec5SDimitry Andricdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6705*0b57cec5SDimitry Andric (ins VR128:$src1), 6706*0b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 6707*0b57cec5SDimitry Andric [(set VR128:$dst, 6708*0b57cec5SDimitry Andric (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 6709*0b57cec5SDimitry Andricdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6710*0b57cec5SDimitry Andric (ins i128mem:$src1), 6711*0b57cec5SDimitry Andric "aesimc\t{$src1, $dst|$dst, $src1}", 6712*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 6713*0b57cec5SDimitry Andric Sched<[WriteAESIMC.Folded]>; 6714*0b57cec5SDimitry Andric 6715*0b57cec5SDimitry Andric// AES Round Key Generation Assist 6716*0b57cec5SDimitry Andriclet Predicates = [HasAVX, HasAES] in { 6717*0b57cec5SDimitry Andric def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6718*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 6719*0b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6720*0b57cec5SDimitry Andric [(set VR128:$dst, 6721*0b57cec5SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 6722*0b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; 6723*0b57cec5SDimitry Andric def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6724*0b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 6725*0b57cec5SDimitry Andric "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6726*0b57cec5SDimitry Andric [(set VR128:$dst, 6727*0b57cec5SDimitry Andric (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>, 6728*0b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; 6729*0b57cec5SDimitry Andric} 6730*0b57cec5SDimitry Andricdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6731*0b57cec5SDimitry Andric (ins VR128:$src1, u8imm:$src2), 6732*0b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6733*0b57cec5SDimitry Andric [(set VR128:$dst, 6734*0b57cec5SDimitry Andric (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 6735*0b57cec5SDimitry Andric Sched<[WriteAESKeyGen]>; 6736*0b57cec5SDimitry Andricdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6737*0b57cec5SDimitry Andric (ins i128mem:$src1, u8imm:$src2), 6738*0b57cec5SDimitry Andric "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6739*0b57cec5SDimitry Andric [(set VR128:$dst, 6740*0b57cec5SDimitry Andric (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>, 6741*0b57cec5SDimitry Andric Sched<[WriteAESKeyGen.Folded]>; 6742*0b57cec5SDimitry Andric 6743*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6744*0b57cec5SDimitry Andric// PCLMUL Instructions 6745*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6746*0b57cec5SDimitry Andric 6747*0b57cec5SDimitry Andric// Immediate transform to help with commuting. 6748*0b57cec5SDimitry Andricdef PCLMULCommuteImm : SDNodeXForm<imm, [{ 6749*0b57cec5SDimitry Andric uint8_t Imm = N->getZExtValue(); 6750*0b57cec5SDimitry Andric return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 6751*0b57cec5SDimitry Andric}]>; 6752*0b57cec5SDimitry Andric 6753*0b57cec5SDimitry Andric// SSE carry-less Multiplication instructions 6754*0b57cec5SDimitry Andriclet Predicates = [NoAVX, HasPCLMUL] in { 6755*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst" in { 6756*0b57cec5SDimitry Andric let isCommutable = 1 in 6757*0b57cec5SDimitry Andric def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 6758*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6759*0b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6760*0b57cec5SDimitry Andric [(set VR128:$dst, 6761*0b57cec5SDimitry Andric (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, 6762*0b57cec5SDimitry Andric Sched<[WriteCLMul]>; 6763*0b57cec5SDimitry Andric 6764*0b57cec5SDimitry Andric def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 6765*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6766*0b57cec5SDimitry Andric "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6767*0b57cec5SDimitry Andric [(set VR128:$dst, 6768*0b57cec5SDimitry Andric (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 6769*0b57cec5SDimitry Andric imm:$src3))]>, 6770*0b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6771*0b57cec5SDimitry Andric } // Constraints = "$src1 = $dst" 6772*0b57cec5SDimitry Andric 6773*0b57cec5SDimitry Andric def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 6774*0b57cec5SDimitry Andric (i8 imm:$src3)), 6775*0b57cec5SDimitry Andric (PCLMULQDQrm VR128:$src1, addr:$src2, 6776*0b57cec5SDimitry Andric (PCLMULCommuteImm imm:$src3))>; 6777*0b57cec5SDimitry Andric} // Predicates = [NoAVX, HasPCLMUL] 6778*0b57cec5SDimitry Andric 6779*0b57cec5SDimitry Andric// SSE aliases 6780*0b57cec5SDimitry Andricforeach HI = ["hq","lq"] in 6781*0b57cec5SDimitry Andricforeach LO = ["hq","lq"] in { 6782*0b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6783*0b57cec5SDimitry Andric (PCLMULQDQrr VR128:$dst, VR128:$src, 6784*0b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6785*0b57cec5SDimitry Andric def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6786*0b57cec5SDimitry Andric (PCLMULQDQrm VR128:$dst, i128mem:$src, 6787*0b57cec5SDimitry Andric !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6788*0b57cec5SDimitry Andric} 6789*0b57cec5SDimitry Andric 6790*0b57cec5SDimitry Andric// AVX carry-less Multiplication instructions 6791*0b57cec5SDimitry Andricmulticlass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 6792*0b57cec5SDimitry Andric PatFrag LdFrag, Intrinsic IntId> { 6793*0b57cec5SDimitry Andric let isCommutable = 1 in 6794*0b57cec5SDimitry Andric def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 6795*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 6796*0b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6797*0b57cec5SDimitry Andric [(set RC:$dst, 6798*0b57cec5SDimitry Andric (IntId RC:$src1, RC:$src2, imm:$src3))]>, 6799*0b57cec5SDimitry Andric Sched<[WriteCLMul]>; 6800*0b57cec5SDimitry Andric 6801*0b57cec5SDimitry Andric def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 6802*0b57cec5SDimitry Andric (ins RC:$src1, MemOp:$src2, u8imm:$src3), 6803*0b57cec5SDimitry Andric "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6804*0b57cec5SDimitry Andric [(set RC:$dst, 6805*0b57cec5SDimitry Andric (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, 6806*0b57cec5SDimitry Andric Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6807*0b57cec5SDimitry Andric 6808*0b57cec5SDimitry Andric // We can commute a load in the first operand by swapping the sources and 6809*0b57cec5SDimitry Andric // rotating the immediate. 6810*0b57cec5SDimitry Andric def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)), 6811*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, 6812*0b57cec5SDimitry Andric (PCLMULCommuteImm imm:$src3))>; 6813*0b57cec5SDimitry Andric} 6814*0b57cec5SDimitry Andric 6815*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 6816*0b57cec5SDimitry Andricdefm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 6817*0b57cec5SDimitry Andric int_x86_pclmulqdq>, VEX_4V, VEX_WIG; 6818*0b57cec5SDimitry Andric 6819*0b57cec5SDimitry Andriclet Predicates = [NoVLX, HasVPCLMULQDQ] in 6820*0b57cec5SDimitry Andricdefm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 6821*0b57cec5SDimitry Andric int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; 6822*0b57cec5SDimitry Andric 6823*0b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 6824*0b57cec5SDimitry Andric X86MemOperand MemOp, string Hi, string Lo> { 6825*0b57cec5SDimitry Andric def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6826*0b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, 6827*0b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6828*0b57cec5SDimitry Andric def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6829*0b57cec5SDimitry Andric (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, 6830*0b57cec5SDimitry Andric !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6831*0b57cec5SDimitry Andric} 6832*0b57cec5SDimitry Andric 6833*0b57cec5SDimitry Andricmulticlass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 6834*0b57cec5SDimitry Andric X86MemOperand MemOp> { 6835*0b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 6836*0b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 6837*0b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 6838*0b57cec5SDimitry Andric defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 6839*0b57cec5SDimitry Andric} 6840*0b57cec5SDimitry Andric 6841*0b57cec5SDimitry Andric// AVX aliases 6842*0b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 6843*0b57cec5SDimitry Andricdefm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 6844*0b57cec5SDimitry Andric 6845*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6846*0b57cec5SDimitry Andric// SSE4A Instructions 6847*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6848*0b57cec5SDimitry Andric 6849*0b57cec5SDimitry Andriclet Predicates = [HasSSE4A] in { 6850*0b57cec5SDimitry Andric 6851*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedInt in { 6852*0b57cec5SDimitry Andriclet Constraints = "$src = $dst" in { 6853*0b57cec5SDimitry Andricdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 6854*0b57cec5SDimitry Andric (ins VR128:$src, u8imm:$len, u8imm:$idx), 6855*0b57cec5SDimitry Andric "extrq\t{$idx, $len, $src|$src, $len, $idx}", 6856*0b57cec5SDimitry Andric [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, 6857*0b57cec5SDimitry Andric imm:$idx))]>, 6858*0b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 6859*0b57cec5SDimitry Andricdef EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6860*0b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 6861*0b57cec5SDimitry Andric "extrq\t{$mask, $src|$src, $mask}", 6862*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 6863*0b57cec5SDimitry Andric VR128:$mask))]>, 6864*0b57cec5SDimitry Andric PD, Sched<[SchedWriteVecALU.XMM]>; 6865*0b57cec5SDimitry Andric 6866*0b57cec5SDimitry Andricdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 6867*0b57cec5SDimitry Andric (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 6868*0b57cec5SDimitry Andric "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 6869*0b57cec5SDimitry Andric [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 6870*0b57cec5SDimitry Andric imm:$len, imm:$idx))]>, 6871*0b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 6872*0b57cec5SDimitry Andricdef INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6873*0b57cec5SDimitry Andric (ins VR128:$src, VR128:$mask), 6874*0b57cec5SDimitry Andric "insertq\t{$mask, $src|$src, $mask}", 6875*0b57cec5SDimitry Andric [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 6876*0b57cec5SDimitry Andric VR128:$mask))]>, 6877*0b57cec5SDimitry Andric XD, Sched<[SchedWriteVecALU.XMM]>; 6878*0b57cec5SDimitry Andric} 6879*0b57cec5SDimitry Andric} // ExeDomain = SSEPackedInt 6880*0b57cec5SDimitry Andric 6881*0b57cec5SDimitry Andric// Non-temporal (unaligned) scalar stores. 6882*0b57cec5SDimitry Andriclet AddedComplexity = 400 in { // Prefer non-temporal versions 6883*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 6884*0b57cec5SDimitry Andricdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 6885*0b57cec5SDimitry Andric "movntss\t{$src, $dst|$dst, $src}", []>, XS; 6886*0b57cec5SDimitry Andric 6887*0b57cec5SDimitry Andricdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6888*0b57cec5SDimitry Andric "movntsd\t{$src, $dst|$dst, $src}", []>, XD; 6889*0b57cec5SDimitry Andric} // SchedRW 6890*0b57cec5SDimitry Andric 6891*0b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR32:$src, addr:$dst), 6892*0b57cec5SDimitry Andric (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 6893*0b57cec5SDimitry Andric 6894*0b57cec5SDimitry Andricdef : Pat<(nontemporalstore FR64:$src, addr:$dst), 6895*0b57cec5SDimitry Andric (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 6896*0b57cec5SDimitry Andric 6897*0b57cec5SDimitry Andric} // AddedComplexity 6898*0b57cec5SDimitry Andric} // HasSSE4A 6899*0b57cec5SDimitry Andric 6900*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6901*0b57cec5SDimitry Andric// AVX Instructions 6902*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6903*0b57cec5SDimitry Andric 6904*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6905*0b57cec5SDimitry Andric// VBROADCAST - Load from memory and broadcast to all elements of the 6906*0b57cec5SDimitry Andric// destination operand 6907*0b57cec5SDimitry Andric// 6908*0b57cec5SDimitry Andricclass avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 6909*0b57cec5SDimitry Andric X86MemOperand x86memop, ValueType VT, 6910*0b57cec5SDimitry Andric PatFrag ld_frag, SchedWrite Sched> : 6911*0b57cec5SDimitry Andric AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 6912*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6913*0b57cec5SDimitry Andric [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, 6914*0b57cec5SDimitry Andric Sched<[Sched]>, VEX; 6915*0b57cec5SDimitry Andric 6916*0b57cec5SDimitry Andric// AVX2 adds register forms 6917*0b57cec5SDimitry Andricclass avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 6918*0b57cec5SDimitry Andric ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 6919*0b57cec5SDimitry Andric AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 6920*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6921*0b57cec5SDimitry Andric [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 6922*0b57cec5SDimitry Andric Sched<[Sched]>, VEX; 6923*0b57cec5SDimitry Andric 6924*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 6925*0b57cec5SDimitry Andric def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 6926*0b57cec5SDimitry Andric f32mem, v4f32, loadf32, 6927*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>; 6928*0b57cec5SDimitry Andric def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 6929*0b57cec5SDimitry Andric f32mem, v8f32, loadf32, 6930*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 6931*0b57cec5SDimitry Andric} 6932*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 6933*0b57cec5SDimitry Andricdef VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 6934*0b57cec5SDimitry Andric v4f64, loadf64, 6935*0b57cec5SDimitry Andric SchedWriteFShuffle.XMM.Folded>, VEX_L; 6936*0b57cec5SDimitry Andric 6937*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 6938*0b57cec5SDimitry Andric def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 6939*0b57cec5SDimitry Andric v4f32, v4f32, SchedWriteFShuffle.XMM>; 6940*0b57cec5SDimitry Andric def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 6941*0b57cec5SDimitry Andric v8f32, v4f32, WriteFShuffle256>, VEX_L; 6942*0b57cec5SDimitry Andric} 6943*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 6944*0b57cec5SDimitry Andricdef VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 6945*0b57cec5SDimitry Andric v4f64, v2f64, WriteFShuffle256>, VEX_L; 6946*0b57cec5SDimitry Andric 6947*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 6948*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 6949*0b57cec5SDimitry Andric (VBROADCASTSSrm addr:$src)>; 6950*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 6951*0b57cec5SDimitry Andric (VBROADCASTSSYrm addr:$src)>; 6952*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 6953*0b57cec5SDimitry Andric (VBROADCASTSDYrm addr:$src)>; 6954*0b57cec5SDimitry Andric} 6955*0b57cec5SDimitry Andric 6956*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6957*0b57cec5SDimitry Andric// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 6958*0b57cec5SDimitry Andric// halves of a 256-bit vector. 6959*0b57cec5SDimitry Andric// 6960*0b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 6961*0b57cec5SDimitry Andricdef VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 6962*0b57cec5SDimitry Andric (ins i128mem:$src), 6963*0b57cec5SDimitry Andric "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 6964*0b57cec5SDimitry Andric Sched<[WriteShuffleLd]>, VEX, VEX_L; 6965*0b57cec5SDimitry Andric 6966*0b57cec5SDimitry Andriclet mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 6967*0b57cec5SDimitry Andric ExeDomain = SSEPackedSingle in 6968*0b57cec5SDimitry Andricdef VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 6969*0b57cec5SDimitry Andric (ins f128mem:$src), 6970*0b57cec5SDimitry Andric "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 6971*0b57cec5SDimitry Andric Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 6972*0b57cec5SDimitry Andric 6973*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 6974*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), 6975*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6976*0b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))), 6977*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6978*0b57cec5SDimitry Andric} 6979*0b57cec5SDimitry Andric 6980*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 6981*0b57cec5SDimitry Andric// convert to integer when profitable. 6982*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 6983*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), 6984*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6985*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))), 6986*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6987*0b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))), 6988*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6989*0b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), 6990*0b57cec5SDimitry Andric (VBROADCASTF128 addr:$src)>; 6991*0b57cec5SDimitry Andric} 6992*0b57cec5SDimitry Andric 6993*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 6994*0b57cec5SDimitry Andric// VINSERTF128 - Insert packed floating-point values 6995*0b57cec5SDimitry Andric// 6996*0b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 6997*0b57cec5SDimitry Andricdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 6998*0b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 6999*0b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7000*0b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 7001*0b57cec5SDimitry Andriclet mayLoad = 1 in 7002*0b57cec5SDimitry Andricdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7003*0b57cec5SDimitry Andric (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 7004*0b57cec5SDimitry Andric "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7005*0b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7006*0b57cec5SDimitry Andric} 7007*0b57cec5SDimitry Andric 7008*0b57cec5SDimitry Andric// To create a 256-bit all ones value, we should produce VCMPTRUEPS 7009*0b57cec5SDimitry Andric// with YMM register containing zero. 7010*0b57cec5SDimitry Andric// FIXME: Avoid producing vxorps to clear the fake inputs. 7011*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7012*0b57cec5SDimitry Andricdef : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 7013*0b57cec5SDimitry Andric} 7014*0b57cec5SDimitry Andric 7015*0b57cec5SDimitry Andricmulticlass vinsert_lowering<string InstrStr, ValueType From, ValueType To, 7016*0b57cec5SDimitry Andric PatFrag memop_frag> { 7017*0b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 7018*0b57cec5SDimitry Andric (iPTR imm)), 7019*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 7020*0b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 7021*0b57cec5SDimitry Andric def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 7022*0b57cec5SDimitry Andric (From (memop_frag addr:$src2)), 7023*0b57cec5SDimitry Andric (iPTR imm)), 7024*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7025*0b57cec5SDimitry Andric (INSERT_get_vinsert128_imm VR256:$ins))>; 7026*0b57cec5SDimitry Andric} 7027*0b57cec5SDimitry Andric 7028*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7029*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>; 7030*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>; 7031*0b57cec5SDimitry Andric} 7032*0b57cec5SDimitry Andric 7033*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7034*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>; 7035*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>; 7036*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>; 7037*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>; 7038*0b57cec5SDimitry Andric} 7039*0b57cec5SDimitry Andric 7040*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7041*0b57cec5SDimitry Andric// VEXTRACTF128 - Extract packed floating-point values 7042*0b57cec5SDimitry Andric// 7043*0b57cec5SDimitry Andriclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7044*0b57cec5SDimitry Andricdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7045*0b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 7046*0b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7047*0b57cec5SDimitry Andric []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 7048*0b57cec5SDimitry Andriclet mayStore = 1 in 7049*0b57cec5SDimitry Andricdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7050*0b57cec5SDimitry Andric (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 7051*0b57cec5SDimitry Andric "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7052*0b57cec5SDimitry Andric []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 7053*0b57cec5SDimitry Andric} 7054*0b57cec5SDimitry Andric 7055*0b57cec5SDimitry Andricmulticlass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 7056*0b57cec5SDimitry Andric def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7057*0b57cec5SDimitry Andric (To (!cast<Instruction>(InstrStr#rr) 7058*0b57cec5SDimitry Andric (From VR256:$src1), 7059*0b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7060*0b57cec5SDimitry Andric def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 7061*0b57cec5SDimitry Andric (iPTR imm))), addr:$dst), 7062*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 7063*0b57cec5SDimitry Andric (EXTRACT_get_vextract128_imm VR128:$ext))>; 7064*0b57cec5SDimitry Andric} 7065*0b57cec5SDimitry Andric 7066*0b57cec5SDimitry Andric// AVX1 patterns 7067*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7068*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 7069*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 7070*0b57cec5SDimitry Andric} 7071*0b57cec5SDimitry Andric 7072*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7073*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 7074*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 7075*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 7076*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 7077*0b57cec5SDimitry Andric} 7078*0b57cec5SDimitry Andric 7079*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7080*0b57cec5SDimitry Andric// VMASKMOV - Conditional SIMD Packed Loads and Stores 7081*0b57cec5SDimitry Andric// 7082*0b57cec5SDimitry Andricmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 7083*0b57cec5SDimitry Andric Intrinsic IntLd, Intrinsic IntLd256, 7084*0b57cec5SDimitry Andric Intrinsic IntSt, Intrinsic IntSt256> { 7085*0b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 7086*0b57cec5SDimitry Andric (ins VR128:$src1, f128mem:$src2), 7087*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7088*0b57cec5SDimitry Andric [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7089*0b57cec5SDimitry Andric VEX_4V, Sched<[WriteFMaskedLoad]>; 7090*0b57cec5SDimitry Andric def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 7091*0b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2), 7092*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7093*0b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7094*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>; 7095*0b57cec5SDimitry Andric def mr : AVX8I<opc_mr, MRMDestMem, (outs), 7096*0b57cec5SDimitry Andric (ins f128mem:$dst, VR128:$src1, VR128:$src2), 7097*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7098*0b57cec5SDimitry Andric [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 7099*0b57cec5SDimitry Andric VEX_4V, Sched<[WriteFMaskedStore]>; 7100*0b57cec5SDimitry Andric def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 7101*0b57cec5SDimitry Andric (ins f256mem:$dst, VR256:$src1, VR256:$src2), 7102*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7103*0b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7104*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>; 7105*0b57cec5SDimitry Andric} 7106*0b57cec5SDimitry Andric 7107*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 7108*0b57cec5SDimitry Andricdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 7109*0b57cec5SDimitry Andric int_x86_avx_maskload_ps, 7110*0b57cec5SDimitry Andric int_x86_avx_maskload_ps_256, 7111*0b57cec5SDimitry Andric int_x86_avx_maskstore_ps, 7112*0b57cec5SDimitry Andric int_x86_avx_maskstore_ps_256>; 7113*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 7114*0b57cec5SDimitry Andricdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 7115*0b57cec5SDimitry Andric int_x86_avx_maskload_pd, 7116*0b57cec5SDimitry Andric int_x86_avx_maskload_pd_256, 7117*0b57cec5SDimitry Andric int_x86_avx_maskstore_pd, 7118*0b57cec5SDimitry Andric int_x86_avx_maskstore_pd_256>; 7119*0b57cec5SDimitry Andric 7120*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7121*0b57cec5SDimitry Andric// VPERMIL - Permute Single and Double Floating-Point Values 7122*0b57cec5SDimitry Andric// 7123*0b57cec5SDimitry Andric 7124*0b57cec5SDimitry Andricmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 7125*0b57cec5SDimitry Andric RegisterClass RC, X86MemOperand x86memop_f, 7126*0b57cec5SDimitry Andric X86MemOperand x86memop_i, 7127*0b57cec5SDimitry Andric ValueType f_vt, ValueType i_vt, 7128*0b57cec5SDimitry Andric X86FoldableSchedWrite sched, 7129*0b57cec5SDimitry Andric X86FoldableSchedWrite varsched> { 7130*0b57cec5SDimitry Andric let Predicates = [HasAVX, NoVLX] in { 7131*0b57cec5SDimitry Andric def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 7132*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2), 7133*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7134*0b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, 7135*0b57cec5SDimitry Andric Sched<[varsched]>; 7136*0b57cec5SDimitry Andric def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 7137*0b57cec5SDimitry Andric (ins RC:$src1, x86memop_i:$src2), 7138*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7139*0b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 7140*0b57cec5SDimitry Andric (i_vt (load addr:$src2)))))]>, VEX_4V, 7141*0b57cec5SDimitry Andric Sched<[varsched.Folded, sched.ReadAfterFold]>; 7142*0b57cec5SDimitry Andric 7143*0b57cec5SDimitry Andric def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 7144*0b57cec5SDimitry Andric (ins RC:$src1, u8imm:$src2), 7145*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7146*0b57cec5SDimitry Andric [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX, 7147*0b57cec5SDimitry Andric Sched<[sched]>; 7148*0b57cec5SDimitry Andric def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 7149*0b57cec5SDimitry Andric (ins x86memop_f:$src1, u8imm:$src2), 7150*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7151*0b57cec5SDimitry Andric [(set RC:$dst, 7152*0b57cec5SDimitry Andric (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX, 7153*0b57cec5SDimitry Andric Sched<[sched.Folded]>; 7154*0b57cec5SDimitry Andric }// Predicates = [HasAVX, NoVLX] 7155*0b57cec5SDimitry Andric} 7156*0b57cec5SDimitry Andric 7157*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 7158*0b57cec5SDimitry Andric defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 7159*0b57cec5SDimitry Andric v4f32, v4i32, SchedWriteFShuffle.XMM, 7160*0b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 7161*0b57cec5SDimitry Andric defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 7162*0b57cec5SDimitry Andric v8f32, v8i32, SchedWriteFShuffle.YMM, 7163*0b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 7164*0b57cec5SDimitry Andric} 7165*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in { 7166*0b57cec5SDimitry Andric defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 7167*0b57cec5SDimitry Andric v2f64, v2i64, SchedWriteFShuffle.XMM, 7168*0b57cec5SDimitry Andric SchedWriteFVarShuffle.XMM>; 7169*0b57cec5SDimitry Andric defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 7170*0b57cec5SDimitry Andric v4f64, v4i64, SchedWriteFShuffle.YMM, 7171*0b57cec5SDimitry Andric SchedWriteFVarShuffle.YMM>, VEX_L; 7172*0b57cec5SDimitry Andric} 7173*0b57cec5SDimitry Andric 7174*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7175*0b57cec5SDimitry Andric// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7176*0b57cec5SDimitry Andric// 7177*0b57cec5SDimitry Andric 7178*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in { 7179*0b57cec5SDimitry Andriclet isCommutable = 1 in 7180*0b57cec5SDimitry Andricdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7181*0b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7182*0b57cec5SDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7183*0b57cec5SDimitry Andric [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7184*0b57cec5SDimitry Andric (i8 imm:$src3))))]>, VEX_4V, VEX_L, 7185*0b57cec5SDimitry Andric Sched<[WriteFShuffle256]>; 7186*0b57cec5SDimitry Andricdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7187*0b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7188*0b57cec5SDimitry Andric "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7189*0b57cec5SDimitry Andric [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), 7190*0b57cec5SDimitry Andric (i8 imm:$src3)))]>, VEX_4V, VEX_L, 7191*0b57cec5SDimitry Andric Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 7192*0b57cec5SDimitry Andric} 7193*0b57cec5SDimitry Andric 7194*0b57cec5SDimitry Andric// Immediate transform to help with commuting. 7195*0b57cec5SDimitry Andricdef Perm2XCommuteImm : SDNodeXForm<imm, [{ 7196*0b57cec5SDimitry Andric return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 7197*0b57cec5SDimitry Andric}]>; 7198*0b57cec5SDimitry Andric 7199*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 7200*0b57cec5SDimitry Andric// Pattern with load in other operand. 7201*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), 7202*0b57cec5SDimitry Andric VR256:$src1, (i8 imm:$imm))), 7203*0b57cec5SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; 7204*0b57cec5SDimitry Andric} 7205*0b57cec5SDimitry Andric 7206*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7207*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7208*0b57cec5SDimitry Andric (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7209*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 7210*0b57cec5SDimitry Andric (loadv4i64 addr:$src2), (i8 imm:$imm))), 7211*0b57cec5SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7212*0b57cec5SDimitry Andric// Pattern with load in other operand. 7213*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), 7214*0b57cec5SDimitry Andric VR256:$src1, (i8 imm:$imm))), 7215*0b57cec5SDimitry Andric (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; 7216*0b57cec5SDimitry Andric} 7217*0b57cec5SDimitry Andric 7218*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7219*0b57cec5SDimitry Andric// VZERO - Zero YMM registers 7220*0b57cec5SDimitry Andric// Note: These instruction do not affect the YMM16-YMM31. 7221*0b57cec5SDimitry Andric// 7222*0b57cec5SDimitry Andric 7223*0b57cec5SDimitry Andriclet SchedRW = [WriteSystem] in { 7224*0b57cec5SDimitry Andriclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 7225*0b57cec5SDimitry Andric YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 7226*0b57cec5SDimitry Andric // Zero All YMM registers 7227*0b57cec5SDimitry Andric def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7228*0b57cec5SDimitry Andric [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, 7229*0b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 7230*0b57cec5SDimitry Andric 7231*0b57cec5SDimitry Andric // Zero Upper bits of YMM registers 7232*0b57cec5SDimitry Andric def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7233*0b57cec5SDimitry Andric [(int_x86_avx_vzeroupper)]>, PS, VEX, 7234*0b57cec5SDimitry Andric Requires<[HasAVX]>, VEX_WIG; 7235*0b57cec5SDimitry Andric} // Defs 7236*0b57cec5SDimitry Andric} // SchedRW 7237*0b57cec5SDimitry Andric 7238*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7239*0b57cec5SDimitry Andric// Half precision conversion instructions 7240*0b57cec5SDimitry Andric// 7241*0b57cec5SDimitry Andric 7242*0b57cec5SDimitry Andricmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 7243*0b57cec5SDimitry Andric X86FoldableSchedWrite sched> { 7244*0b57cec5SDimitry Andric def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7245*0b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 7246*0b57cec5SDimitry Andric [(set RC:$dst, (X86cvtph2ps VR128:$src))]>, 7247*0b57cec5SDimitry Andric T8PD, VEX, Sched<[sched]>; 7248*0b57cec5SDimitry Andric let hasSideEffects = 0, mayLoad = 1 in 7249*0b57cec5SDimitry Andric def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7250*0b57cec5SDimitry Andric "vcvtph2ps\t{$src, $dst|$dst, $src}", 7251*0b57cec5SDimitry Andric [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>, 7252*0b57cec5SDimitry Andric T8PD, VEX, Sched<[sched.Folded]>; 7253*0b57cec5SDimitry Andric} 7254*0b57cec5SDimitry Andric 7255*0b57cec5SDimitry Andricmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 7256*0b57cec5SDimitry Andric SchedWrite RR, SchedWrite MR> { 7257*0b57cec5SDimitry Andric def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 7258*0b57cec5SDimitry Andric (ins RC:$src1, i32u8imm:$src2), 7259*0b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7260*0b57cec5SDimitry Andric [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>, 7261*0b57cec5SDimitry Andric TAPD, VEX, Sched<[RR]>; 7262*0b57cec5SDimitry Andric let hasSideEffects = 0, mayStore = 1 in 7263*0b57cec5SDimitry Andric def mr : Ii8<0x1D, MRMDestMem, (outs), 7264*0b57cec5SDimitry Andric (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 7265*0b57cec5SDimitry Andric "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7266*0b57cec5SDimitry Andric TAPD, VEX, Sched<[MR]>; 7267*0b57cec5SDimitry Andric} 7268*0b57cec5SDimitry Andric 7269*0b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 7270*0b57cec5SDimitry Andric defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>; 7271*0b57cec5SDimitry Andric defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L; 7272*0b57cec5SDimitry Andric defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 7273*0b57cec5SDimitry Andric WriteCvtPS2PHSt>; 7274*0b57cec5SDimitry Andric defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 7275*0b57cec5SDimitry Andric WriteCvtPS2PHYSt>, VEX_L; 7276*0b57cec5SDimitry Andric 7277*0b57cec5SDimitry Andric // Pattern match vcvtph2ps of a scalar i64 load. 7278*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 7279*0b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 7280*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 7281*0b57cec5SDimitry Andric (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 7282*0b57cec5SDimitry Andric (VCVTPH2PSrm addr:$src)>; 7283*0b57cec5SDimitry Andric 7284*0b57cec5SDimitry Andric def : Pat<(store (f64 (extractelt 7285*0b57cec5SDimitry Andric (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), 7286*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7287*0b57cec5SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; 7288*0b57cec5SDimitry Andric def : Pat<(store (i64 (extractelt 7289*0b57cec5SDimitry Andric (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), 7290*0b57cec5SDimitry Andric (iPTR 0))), addr:$dst), 7291*0b57cec5SDimitry Andric (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; 7292*0b57cec5SDimitry Andric def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), 7293*0b57cec5SDimitry Andric (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; 7294*0b57cec5SDimitry Andric} 7295*0b57cec5SDimitry Andric 7296*0b57cec5SDimitry Andric// Patterns for matching conversions from float to half-float and vice versa. 7297*0b57cec5SDimitry Andriclet Predicates = [HasF16C, NoVLX] in { 7298*0b57cec5SDimitry Andric // Use MXCSR.RC for rounding instead of explicitly specifying the default 7299*0b57cec5SDimitry Andric // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the 7300*0b57cec5SDimitry Andric // configurations we support (the default). However, falling back to MXCSR is 7301*0b57cec5SDimitry Andric // more consistent with other instructions, which are always controlled by it. 7302*0b57cec5SDimitry Andric // It's encoded as 0b100. 7303*0b57cec5SDimitry Andric def : Pat<(fp_to_f16 FR32:$src), 7304*0b57cec5SDimitry Andric (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (v8i16 (VCVTPS2PHrr 7305*0b57cec5SDimitry Andric (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4))), sub_16bit))>; 7306*0b57cec5SDimitry Andric 7307*0b57cec5SDimitry Andric def : Pat<(f16_to_fp GR16:$src), 7308*0b57cec5SDimitry Andric (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr 7309*0b57cec5SDimitry Andric (v4i32 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)))), FR32)) >; 7310*0b57cec5SDimitry Andric 7311*0b57cec5SDimitry Andric def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), 7312*0b57cec5SDimitry Andric (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr 7313*0b57cec5SDimitry Andric (v8i16 (VCVTPS2PHrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4)))), FR32)) >; 7314*0b57cec5SDimitry Andric} 7315*0b57cec5SDimitry Andric 7316*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7317*0b57cec5SDimitry Andric// AVX2 Instructions 7318*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7319*0b57cec5SDimitry Andric 7320*0b57cec5SDimitry Andric/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 7321*0b57cec5SDimitry Andricmulticlass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 7322*0b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite sched, 7323*0b57cec5SDimitry Andric RegisterClass RC, 7324*0b57cec5SDimitry Andric X86MemOperand x86memop, SDNodeXForm commuteXForm> { 7325*0b57cec5SDimitry Andric let isCommutable = 1 in 7326*0b57cec5SDimitry Andric def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 7327*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), 7328*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7329*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7330*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, 7331*0b57cec5SDimitry Andric Sched<[sched]>, VEX_4V; 7332*0b57cec5SDimitry Andric def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 7333*0b57cec5SDimitry Andric (ins RC:$src1, x86memop:$src2, u8imm:$src3), 7334*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7335*0b57cec5SDimitry Andric "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7336*0b57cec5SDimitry Andric [(set RC:$dst, 7337*0b57cec5SDimitry Andric (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>, 7338*0b57cec5SDimitry Andric Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; 7339*0b57cec5SDimitry Andric 7340*0b57cec5SDimitry Andric // Pattern to commute if load is in first source. 7341*0b57cec5SDimitry Andric def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)), 7342*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 7343*0b57cec5SDimitry Andric (commuteXForm imm:$src3))>; 7344*0b57cec5SDimitry Andric} 7345*0b57cec5SDimitry Andric 7346*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 7347*0b57cec5SDimitry Andricdefm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 7348*0b57cec5SDimitry Andric SchedWriteBlend.XMM, VR128, i128mem, 7349*0b57cec5SDimitry Andric BlendCommuteImm4>; 7350*0b57cec5SDimitry Andricdefm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 7351*0b57cec5SDimitry Andric SchedWriteBlend.YMM, VR256, i256mem, 7352*0b57cec5SDimitry Andric BlendCommuteImm8>, VEX_L; 7353*0b57cec5SDimitry Andric 7354*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), 7355*0b57cec5SDimitry Andric (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>; 7356*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), 7357*0b57cec5SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; 7358*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), 7359*0b57cec5SDimitry Andric (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; 7360*0b57cec5SDimitry Andric 7361*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), 7362*0b57cec5SDimitry Andric (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>; 7363*0b57cec5SDimitry Andricdef : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), 7364*0b57cec5SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>; 7365*0b57cec5SDimitry Andricdef : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), 7366*0b57cec5SDimitry Andric (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>; 7367*0b57cec5SDimitry Andric} 7368*0b57cec5SDimitry Andric 7369*0b57cec5SDimitry Andric// For insertion into the zero index (low half) of a 256-bit vector, it is 7370*0b57cec5SDimitry Andric// more efficient to generate a blend with immediate instead of an insert*128. 7371*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but exeuction domain fixing should 7372*0b57cec5SDimitry Andric// take care of using integer instructions when profitable. 7373*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 7374*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 7375*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 7376*0b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7377*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7378*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 7379*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 7380*0b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7381*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7382*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 7383*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 7384*0b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7385*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7386*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 7387*0b57cec5SDimitry Andric (VBLENDPSYrri VR256:$src1, 7388*0b57cec5SDimitry Andric (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7389*0b57cec5SDimitry Andric VR128:$src2, sub_xmm), 0xf)>; 7390*0b57cec5SDimitry Andric 7391*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 7392*0b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7393*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7394*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 7395*0b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7396*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7397*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 7398*0b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7399*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7400*0b57cec5SDimitry Andricdef : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 7401*0b57cec5SDimitry Andric (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7402*0b57cec5SDimitry Andric VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7403*0b57cec5SDimitry Andric} 7404*0b57cec5SDimitry Andric 7405*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7406*0b57cec5SDimitry Andric// VPBROADCAST - Load from memory and broadcast to all elements of the 7407*0b57cec5SDimitry Andric// destination operand 7408*0b57cec5SDimitry Andric// 7409*0b57cec5SDimitry Andricmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr, 7410*0b57cec5SDimitry Andric X86MemOperand x86memop, PatFrag ld_frag, 7411*0b57cec5SDimitry Andric ValueType OpVT128, ValueType OpVT256, Predicate prd> { 7412*0b57cec5SDimitry Andric let Predicates = [HasAVX2, prd] in { 7413*0b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 7414*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7415*0b57cec5SDimitry Andric [(set VR128:$dst, 7416*0b57cec5SDimitry Andric (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7417*0b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM]>, VEX; 7418*0b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 7419*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7420*0b57cec5SDimitry Andric [(set VR128:$dst, 7421*0b57cec5SDimitry Andric (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>, 7422*0b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 7423*0b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 7424*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7425*0b57cec5SDimitry Andric [(set VR256:$dst, 7426*0b57cec5SDimitry Andric (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7427*0b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 7428*0b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 7429*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7430*0b57cec5SDimitry Andric [(set VR256:$dst, 7431*0b57cec5SDimitry Andric (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>, 7432*0b57cec5SDimitry Andric Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 7433*0b57cec5SDimitry Andric 7434*0b57cec5SDimitry Andric // Provide aliases for broadcast from the same register class that 7435*0b57cec5SDimitry Andric // automatically does the extract. 7436*0b57cec5SDimitry Andric def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 7437*0b57cec5SDimitry Andric (!cast<Instruction>(NAME#"Yrr") 7438*0b57cec5SDimitry Andric (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 7439*0b57cec5SDimitry Andric } 7440*0b57cec5SDimitry Andric} 7441*0b57cec5SDimitry Andric 7442*0b57cec5SDimitry Andricdefm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 7443*0b57cec5SDimitry Andric v16i8, v32i8, NoVLX_Or_NoBWI>; 7444*0b57cec5SDimitry Andricdefm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 7445*0b57cec5SDimitry Andric v8i16, v16i16, NoVLX_Or_NoBWI>; 7446*0b57cec5SDimitry Andricdefm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 7447*0b57cec5SDimitry Andric v4i32, v8i32, NoVLX>; 7448*0b57cec5SDimitry Andricdefm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 7449*0b57cec5SDimitry Andric v2i64, v4i64, NoVLX>; 7450*0b57cec5SDimitry Andric 7451*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7452*0b57cec5SDimitry Andric // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. 7453*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 7454*0b57cec5SDimitry Andric (VPBROADCASTQrm addr:$src)>; 7455*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), 7456*0b57cec5SDimitry Andric (VPBROADCASTQYrm addr:$src)>; 7457*0b57cec5SDimitry Andric 7458*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 7459*0b57cec5SDimitry Andric (VPBROADCASTDrm addr:$src)>; 7460*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 7461*0b57cec5SDimitry Andric (VPBROADCASTDYrm addr:$src)>; 7462*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 7463*0b57cec5SDimitry Andric (VPBROADCASTQrm addr:$src)>; 7464*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 7465*0b57cec5SDimitry Andric (VPBROADCASTQYrm addr:$src)>; 7466*0b57cec5SDimitry Andric} 7467*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 7468*0b57cec5SDimitry Andric // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. 7469*0b57cec5SDimitry Andric // This means we'll encounter truncated i32 loads; match that here. 7470*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 7471*0b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 7472*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), 7473*0b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 7474*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast 7475*0b57cec5SDimitry Andric (i16 (trunc (i32 (extloadi16 addr:$src)))))), 7476*0b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 7477*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast 7478*0b57cec5SDimitry Andric (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 7479*0b57cec5SDimitry Andric (VPBROADCASTWrm addr:$src)>; 7480*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast 7481*0b57cec5SDimitry Andric (i16 (trunc (i32 (extloadi16 addr:$src)))))), 7482*0b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 7483*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast 7484*0b57cec5SDimitry Andric (i16 (trunc (i32 (zextloadi16 addr:$src)))))), 7485*0b57cec5SDimitry Andric (VPBROADCASTWYrm addr:$src)>; 7486*0b57cec5SDimitry Andric} 7487*0b57cec5SDimitry Andric 7488*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7489*0b57cec5SDimitry Andric // Provide aliases for broadcast from the same register class that 7490*0b57cec5SDimitry Andric // automatically does the extract. 7491*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), 7492*0b57cec5SDimitry Andric (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), 7493*0b57cec5SDimitry Andric sub_xmm)))>; 7494*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))), 7495*0b57cec5SDimitry Andric (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), 7496*0b57cec5SDimitry Andric sub_xmm)))>; 7497*0b57cec5SDimitry Andric} 7498*0b57cec5SDimitry Andric 7499*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7500*0b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 7501*0b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 7502*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7503*0b57cec5SDimitry Andric (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7504*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7505*0b57cec5SDimitry Andric (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7506*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7507*0b57cec5SDimitry Andric (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7508*0b57cec5SDimitry Andric} 7509*0b57cec5SDimitry Andric 7510*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 7511*0b57cec5SDimitry Andric def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 7512*0b57cec5SDimitry Andric (VPBROADCASTBrr (v16i8 (COPY_TO_REGCLASS 7513*0b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7514*0b57cec5SDimitry Andric GR8:$src, sub_8bit)), 7515*0b57cec5SDimitry Andric VR128)))>; 7516*0b57cec5SDimitry Andric def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 7517*0b57cec5SDimitry Andric (VPBROADCASTBYrr (v16i8 (COPY_TO_REGCLASS 7518*0b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7519*0b57cec5SDimitry Andric GR8:$src, sub_8bit)), 7520*0b57cec5SDimitry Andric VR128)))>; 7521*0b57cec5SDimitry Andric 7522*0b57cec5SDimitry Andric def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 7523*0b57cec5SDimitry Andric (VPBROADCASTWrr (v8i16 (COPY_TO_REGCLASS 7524*0b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7525*0b57cec5SDimitry Andric GR16:$src, sub_16bit)), 7526*0b57cec5SDimitry Andric VR128)))>; 7527*0b57cec5SDimitry Andric def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 7528*0b57cec5SDimitry Andric (VPBROADCASTWYrr (v8i16 (COPY_TO_REGCLASS 7529*0b57cec5SDimitry Andric (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7530*0b57cec5SDimitry Andric GR16:$src, sub_16bit)), 7531*0b57cec5SDimitry Andric VR128)))>; 7532*0b57cec5SDimitry Andric} 7533*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7534*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7535*0b57cec5SDimitry Andric (VPBROADCASTDrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; 7536*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7537*0b57cec5SDimitry Andric (VPBROADCASTDYrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; 7538*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 7539*0b57cec5SDimitry Andric (VPBROADCASTQrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; 7540*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7541*0b57cec5SDimitry Andric (VPBROADCASTQYrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; 7542*0b57cec5SDimitry Andric} 7543*0b57cec5SDimitry Andric 7544*0b57cec5SDimitry Andric// AVX1 broadcast patterns 7545*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7546*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 7547*0b57cec5SDimitry Andric (VBROADCASTSSYrm addr:$src)>; 7548*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 7549*0b57cec5SDimitry Andric (VBROADCASTSDYrm addr:$src)>; 7550*0b57cec5SDimitry Andricdef : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 7551*0b57cec5SDimitry Andric (VBROADCASTSSrm addr:$src)>; 7552*0b57cec5SDimitry Andric} 7553*0b57cec5SDimitry Andric 7554*0b57cec5SDimitry Andric // Provide fallback in case the load node that is used in the patterns above 7555*0b57cec5SDimitry Andric // is used by additional users, which prevents the pattern selection. 7556*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7557*0b57cec5SDimitry Andric // 128bit broadcasts: 7558*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast f64:$src)), 7559*0b57cec5SDimitry Andric (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7560*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 7561*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 7562*0b57cec5SDimitry Andric 7563*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 7564*0b57cec5SDimitry Andric (VMOVDDUPrr VR128:$src)>; 7565*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), 7566*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 7567*0b57cec5SDimitry Andric def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), 7568*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 7569*0b57cec5SDimitry Andric} 7570*0b57cec5SDimitry Andric 7571*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7572*0b57cec5SDimitry Andric def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7573*0b57cec5SDimitry Andric (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 7574*0b57cec5SDimitry Andric def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7575*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7576*0b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 7577*0b57cec5SDimitry Andric (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 7578*0b57cec5SDimitry Andric def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7579*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7580*0b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 7581*0b57cec5SDimitry Andric (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 7582*0b57cec5SDimitry Andric 7583*0b57cec5SDimitry Andric def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7584*0b57cec5SDimitry Andric (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)>; 7585*0b57cec5SDimitry Andric def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7586*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7587*0b57cec5SDimitry Andric (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), sub_xmm), 7588*0b57cec5SDimitry Andric (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), 1)>; 7589*0b57cec5SDimitry Andric def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7590*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 7591*0b57cec5SDimitry Andric (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), sub_xmm), 7592*0b57cec5SDimitry Andric (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), 1)>; 7593*0b57cec5SDimitry Andric 7594*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast i64:$src)), 7595*0b57cec5SDimitry Andric (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)>; 7596*0b57cec5SDimitry Andric def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 7597*0b57cec5SDimitry Andric (VMOVDDUPrm addr:$src)>; 7598*0b57cec5SDimitry Andric} 7599*0b57cec5SDimitry Andric 7600*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7601*0b57cec5SDimitry Andric// VPERM - Permute instructions 7602*0b57cec5SDimitry Andric// 7603*0b57cec5SDimitry Andric 7604*0b57cec5SDimitry Andricmulticlass avx2_perm<bits<8> opc, string OpcodeStr, 7605*0b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 7606*0b57cec5SDimitry Andric X86MemOperand memOp> { 7607*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 7608*0b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7609*0b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 7610*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7611*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7612*0b57cec5SDimitry Andric [(set VR256:$dst, 7613*0b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 7614*0b57cec5SDimitry Andric Sched<[Sched]>, VEX_4V, VEX_L; 7615*0b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7616*0b57cec5SDimitry Andric (ins VR256:$src1, memOp:$src2), 7617*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7618*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7619*0b57cec5SDimitry Andric [(set VR256:$dst, 7620*0b57cec5SDimitry Andric (OpVT (X86VPermv VR256:$src1, 7621*0b57cec5SDimitry Andric (load addr:$src2))))]>, 7622*0b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; 7623*0b57cec5SDimitry Andric } 7624*0b57cec5SDimitry Andric} 7625*0b57cec5SDimitry Andric 7626*0b57cec5SDimitry Andricdefm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 7627*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedSingle in 7628*0b57cec5SDimitry Andricdefm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 7629*0b57cec5SDimitry Andric 7630*0b57cec5SDimitry Andricmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7631*0b57cec5SDimitry Andric ValueType OpVT, X86FoldableSchedWrite Sched, 7632*0b57cec5SDimitry Andric X86MemOperand memOp> { 7633*0b57cec5SDimitry Andric let Predicates = [HasAVX2, NoVLX] in { 7634*0b57cec5SDimitry Andric def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 7635*0b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 7636*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7637*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7638*0b57cec5SDimitry Andric [(set VR256:$dst, 7639*0b57cec5SDimitry Andric (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 7640*0b57cec5SDimitry Andric Sched<[Sched]>, VEX, VEX_L; 7641*0b57cec5SDimitry Andric def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 7642*0b57cec5SDimitry Andric (ins memOp:$src1, u8imm:$src2), 7643*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7644*0b57cec5SDimitry Andric "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7645*0b57cec5SDimitry Andric [(set VR256:$dst, 7646*0b57cec5SDimitry Andric (OpVT (X86VPermi (mem_frag addr:$src1), 7647*0b57cec5SDimitry Andric (i8 imm:$src2))))]>, 7648*0b57cec5SDimitry Andric Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 7649*0b57cec5SDimitry Andric } 7650*0b57cec5SDimitry Andric} 7651*0b57cec5SDimitry Andric 7652*0b57cec5SDimitry Andricdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 7653*0b57cec5SDimitry Andric WriteShuffle256, i256mem>, VEX_W; 7654*0b57cec5SDimitry Andriclet ExeDomain = SSEPackedDouble in 7655*0b57cec5SDimitry Andricdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 7656*0b57cec5SDimitry Andric WriteFShuffle256, f256mem>, VEX_W; 7657*0b57cec5SDimitry Andric 7658*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7659*0b57cec5SDimitry Andric// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 7660*0b57cec5SDimitry Andric// 7661*0b57cec5SDimitry Andriclet isCommutable = 1 in 7662*0b57cec5SDimitry Andricdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 7663*0b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7664*0b57cec5SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7665*0b57cec5SDimitry Andric [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7666*0b57cec5SDimitry Andric (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, 7667*0b57cec5SDimitry Andric VEX_4V, VEX_L; 7668*0b57cec5SDimitry Andricdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 7669*0b57cec5SDimitry Andric (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7670*0b57cec5SDimitry Andric "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7671*0b57cec5SDimitry Andric [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 7672*0b57cec5SDimitry Andric (i8 imm:$src3)))]>, 7673*0b57cec5SDimitry Andric Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7674*0b57cec5SDimitry Andric 7675*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in 7676*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), 7677*0b57cec5SDimitry Andric VR256:$src1, (i8 imm:$imm))), 7678*0b57cec5SDimitry Andric (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; 7679*0b57cec5SDimitry Andric 7680*0b57cec5SDimitry Andric 7681*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7682*0b57cec5SDimitry Andric// VINSERTI128 - Insert packed integer values 7683*0b57cec5SDimitry Andric// 7684*0b57cec5SDimitry Andriclet hasSideEffects = 0 in { 7685*0b57cec5SDimitry Andricdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 7686*0b57cec5SDimitry Andric (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7687*0b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7688*0b57cec5SDimitry Andric []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 7689*0b57cec5SDimitry Andriclet mayLoad = 1 in 7690*0b57cec5SDimitry Andricdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 7691*0b57cec5SDimitry Andric (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 7692*0b57cec5SDimitry Andric "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7693*0b57cec5SDimitry Andric []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7694*0b57cec5SDimitry Andric} 7695*0b57cec5SDimitry Andric 7696*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7697*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>; 7698*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>; 7699*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>; 7700*0b57cec5SDimitry Andric defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>; 7701*0b57cec5SDimitry Andric} 7702*0b57cec5SDimitry Andric 7703*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7704*0b57cec5SDimitry Andric// VEXTRACTI128 - Extract packed integer values 7705*0b57cec5SDimitry Andric// 7706*0b57cec5SDimitry Andricdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 7707*0b57cec5SDimitry Andric (ins VR256:$src1, u8imm:$src2), 7708*0b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7709*0b57cec5SDimitry Andric Sched<[WriteShuffle256]>, VEX, VEX_L; 7710*0b57cec5SDimitry Andriclet hasSideEffects = 0, mayStore = 1 in 7711*0b57cec5SDimitry Andricdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 7712*0b57cec5SDimitry Andric (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 7713*0b57cec5SDimitry Andric "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7714*0b57cec5SDimitry Andric Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 7715*0b57cec5SDimitry Andric 7716*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7717*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 7718*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 7719*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 7720*0b57cec5SDimitry Andric defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 7721*0b57cec5SDimitry Andric} 7722*0b57cec5SDimitry Andric 7723*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7724*0b57cec5SDimitry Andric// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 7725*0b57cec5SDimitry Andric// 7726*0b57cec5SDimitry Andricmulticlass avx2_pmovmask<string OpcodeStr, 7727*0b57cec5SDimitry Andric Intrinsic IntLd128, Intrinsic IntLd256, 7728*0b57cec5SDimitry Andric Intrinsic IntSt128, Intrinsic IntSt256> { 7729*0b57cec5SDimitry Andric def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 7730*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 7731*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7732*0b57cec5SDimitry Andric [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 7733*0b57cec5SDimitry Andric VEX_4V, Sched<[WriteVecMaskedLoad]>; 7734*0b57cec5SDimitry Andric def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 7735*0b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 7736*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7737*0b57cec5SDimitry Andric [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7738*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>; 7739*0b57cec5SDimitry Andric def mr : AVX28I<0x8e, MRMDestMem, (outs), 7740*0b57cec5SDimitry Andric (ins i128mem:$dst, VR128:$src1, VR128:$src2), 7741*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7742*0b57cec5SDimitry Andric [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 7743*0b57cec5SDimitry Andric VEX_4V, Sched<[WriteVecMaskedStore]>; 7744*0b57cec5SDimitry Andric def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 7745*0b57cec5SDimitry Andric (ins i256mem:$dst, VR256:$src1, VR256:$src2), 7746*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7747*0b57cec5SDimitry Andric [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7748*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>; 7749*0b57cec5SDimitry Andric} 7750*0b57cec5SDimitry Andric 7751*0b57cec5SDimitry Andricdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 7752*0b57cec5SDimitry Andric int_x86_avx2_maskload_d, 7753*0b57cec5SDimitry Andric int_x86_avx2_maskload_d_256, 7754*0b57cec5SDimitry Andric int_x86_avx2_maskstore_d, 7755*0b57cec5SDimitry Andric int_x86_avx2_maskstore_d_256>; 7756*0b57cec5SDimitry Andricdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 7757*0b57cec5SDimitry Andric int_x86_avx2_maskload_q, 7758*0b57cec5SDimitry Andric int_x86_avx2_maskload_q_256, 7759*0b57cec5SDimitry Andric int_x86_avx2_maskstore_q, 7760*0b57cec5SDimitry Andric int_x86_avx2_maskstore_q_256>, VEX_W; 7761*0b57cec5SDimitry Andric 7762*0b57cec5SDimitry Andricmulticlass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 7763*0b57cec5SDimitry Andric ValueType MaskVT, string BlendStr, ValueType ZeroVT> { 7764*0b57cec5SDimitry Andric // masked store 7765*0b57cec5SDimitry Andric def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 7766*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 7767*0b57cec5SDimitry Andric // masked load 7768*0b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 7769*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7770*0b57cec5SDimitry Andric def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 7771*0b57cec5SDimitry Andric (VT immAllZerosV))), 7772*0b57cec5SDimitry Andric (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7773*0b57cec5SDimitry Andric} 7774*0b57cec5SDimitry Andriclet Predicates = [HasAVX] in { 7775*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>; 7776*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>; 7777*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>; 7778*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>; 7779*0b57cec5SDimitry Andric} 7780*0b57cec5SDimitry Andriclet Predicates = [HasAVX1Only] in { 7781*0b57cec5SDimitry Andric // load/store i32/i64 not supported use ps/pd version 7782*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; 7783*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; 7784*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; 7785*0b57cec5SDimitry Andric defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; 7786*0b57cec5SDimitry Andric} 7787*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 7788*0b57cec5SDimitry Andric defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; 7789*0b57cec5SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; 7790*0b57cec5SDimitry Andric defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; 7791*0b57cec5SDimitry Andric defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; 7792*0b57cec5SDimitry Andric} 7793*0b57cec5SDimitry Andric 7794*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7795*0b57cec5SDimitry Andric// SubVector Broadcasts 7796*0b57cec5SDimitry Andric// Provide fallback in case the load node that is used in the patterns above 7797*0b57cec5SDimitry Andric// is used by additional users, which prevents the pattern selection. 7798*0b57cec5SDimitry Andric 7799*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7800*0b57cec5SDimitry Andricdef : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))), 7801*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7802*0b57cec5SDimitry Andric (v2f64 VR128:$src), 1)>; 7803*0b57cec5SDimitry Andricdef : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))), 7804*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7805*0b57cec5SDimitry Andric (v4f32 VR128:$src), 1)>; 7806*0b57cec5SDimitry Andric} 7807*0b57cec5SDimitry Andric 7808*0b57cec5SDimitry Andric// NOTE: We're using FP instructions here, but execution domain fixing can 7809*0b57cec5SDimitry Andric// convert to integer when profitable. 7810*0b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 7811*0b57cec5SDimitry Andricdef : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))), 7812*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7813*0b57cec5SDimitry Andric (v2i64 VR128:$src), 1)>; 7814*0b57cec5SDimitry Andricdef : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))), 7815*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7816*0b57cec5SDimitry Andric (v4i32 VR128:$src), 1)>; 7817*0b57cec5SDimitry Andricdef : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))), 7818*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7819*0b57cec5SDimitry Andric (v8i16 VR128:$src), 1)>; 7820*0b57cec5SDimitry Andricdef : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))), 7821*0b57cec5SDimitry Andric (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm), 7822*0b57cec5SDimitry Andric (v16i8 VR128:$src), 1)>; 7823*0b57cec5SDimitry Andric} 7824*0b57cec5SDimitry Andric 7825*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7826*0b57cec5SDimitry Andric// Variable Bit Shifts 7827*0b57cec5SDimitry Andric// 7828*0b57cec5SDimitry Andricmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 7829*0b57cec5SDimitry Andric ValueType vt128, ValueType vt256> { 7830*0b57cec5SDimitry Andric def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 7831*0b57cec5SDimitry Andric (ins VR128:$src1, VR128:$src2), 7832*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7833*0b57cec5SDimitry Andric [(set VR128:$dst, 7834*0b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 7835*0b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; 7836*0b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 7837*0b57cec5SDimitry Andric (ins VR128:$src1, i128mem:$src2), 7838*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7839*0b57cec5SDimitry Andric [(set VR128:$dst, 7840*0b57cec5SDimitry Andric (vt128 (OpNode VR128:$src1, 7841*0b57cec5SDimitry Andric (vt128 (load addr:$src2)))))]>, 7842*0b57cec5SDimitry Andric VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, 7843*0b57cec5SDimitry Andric SchedWriteVarVecShift.XMM.ReadAfterFold]>; 7844*0b57cec5SDimitry Andric def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7845*0b57cec5SDimitry Andric (ins VR256:$src1, VR256:$src2), 7846*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7847*0b57cec5SDimitry Andric [(set VR256:$dst, 7848*0b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 7849*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 7850*0b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7851*0b57cec5SDimitry Andric (ins VR256:$src1, i256mem:$src2), 7852*0b57cec5SDimitry Andric !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7853*0b57cec5SDimitry Andric [(set VR256:$dst, 7854*0b57cec5SDimitry Andric (vt256 (OpNode VR256:$src1, 7855*0b57cec5SDimitry Andric (vt256 (load addr:$src2)))))]>, 7856*0b57cec5SDimitry Andric VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 7857*0b57cec5SDimitry Andric SchedWriteVarVecShift.YMM.ReadAfterFold]>; 7858*0b57cec5SDimitry Andric} 7859*0b57cec5SDimitry Andric 7860*0b57cec5SDimitry Andriclet Predicates = [HasAVX2, NoVLX] in { 7861*0b57cec5SDimitry Andric defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 7862*0b57cec5SDimitry Andric defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; 7863*0b57cec5SDimitry Andric defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 7864*0b57cec5SDimitry Andric defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; 7865*0b57cec5SDimitry Andric defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 7866*0b57cec5SDimitry Andric} 7867*0b57cec5SDimitry Andric 7868*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7869*0b57cec5SDimitry Andric// VGATHER - GATHER Operations 7870*0b57cec5SDimitry Andric 7871*0b57cec5SDimitry Andric// FIXME: Improve scheduling of gather instructions. 7872*0b57cec5SDimitry Andricmulticlass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx, 7873*0b57cec5SDimitry Andric ValueType VTy, PatFrag GatherNode128, 7874*0b57cec5SDimitry Andric PatFrag GatherNode256, RegisterClass RC256, 7875*0b57cec5SDimitry Andric X86MemOperand memop128, X86MemOperand memop256, 7876*0b57cec5SDimitry Andric ValueType MTx = VTx, ValueType MTy = VTy> { 7877*0b57cec5SDimitry Andric def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 7878*0b57cec5SDimitry Andric (ins VR128:$src1, memop128:$src2, VR128:$mask), 7879*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7880*0b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7881*0b57cec5SDimitry Andric [(set (VTx VR128:$dst), (MTx VR128:$mask_wb), 7882*0b57cec5SDimitry Andric (GatherNode128 VR128:$src1, VR128:$mask, 7883*0b57cec5SDimitry Andric vectoraddr:$src2))]>, 7884*0b57cec5SDimitry Andric VEX, Sched<[WriteLoad]>; 7885*0b57cec5SDimitry Andric def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 7886*0b57cec5SDimitry Andric (ins RC256:$src1, memop256:$src2, RC256:$mask), 7887*0b57cec5SDimitry Andric !strconcat(OpcodeStr, 7888*0b57cec5SDimitry Andric "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7889*0b57cec5SDimitry Andric [(set (VTy RC256:$dst), (MTy RC256:$mask_wb), 7890*0b57cec5SDimitry Andric (GatherNode256 RC256:$src1, RC256:$mask, 7891*0b57cec5SDimitry Andric vectoraddr:$src2))]>, 7892*0b57cec5SDimitry Andric VEX, VEX_L, Sched<[WriteLoad]>; 7893*0b57cec5SDimitry Andric} 7894*0b57cec5SDimitry Andric 7895*0b57cec5SDimitry Andriclet Predicates = [HasAVX2] in { 7896*0b57cec5SDimitry Andric let mayLoad = 1, hasSideEffects = 0, Constraints 7897*0b57cec5SDimitry Andric = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 7898*0b57cec5SDimitry Andric in { 7899*0b57cec5SDimitry Andric defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32, 7900*0b57cec5SDimitry Andric mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W; 7901*0b57cec5SDimitry Andric defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64, 7902*0b57cec5SDimitry Andric mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W; 7903*0b57cec5SDimitry Andric defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32, 7904*0b57cec5SDimitry Andric mgatherv8i32, VR256, vx128mem, vy256mem>; 7905*0b57cec5SDimitry Andric defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64, 7906*0b57cec5SDimitry Andric mgatherv4i64, VR128, vx64mem, vy128mem>; 7907*0b57cec5SDimitry Andric 7908*0b57cec5SDimitry Andric let ExeDomain = SSEPackedDouble in { 7909*0b57cec5SDimitry Andric defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32, 7910*0b57cec5SDimitry Andric mgatherv4i32, VR256, vx128mem, vx256mem, 7911*0b57cec5SDimitry Andric v2i64, v4i64>, VEX_W; 7912*0b57cec5SDimitry Andric defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64, 7913*0b57cec5SDimitry Andric mgatherv4i64, VR256, vx128mem, vy256mem, 7914*0b57cec5SDimitry Andric v2i64, v4i64>, VEX_W; 7915*0b57cec5SDimitry Andric } 7916*0b57cec5SDimitry Andric 7917*0b57cec5SDimitry Andric let ExeDomain = SSEPackedSingle in { 7918*0b57cec5SDimitry Andric defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32, 7919*0b57cec5SDimitry Andric mgatherv8i32, VR256, vx128mem, vy256mem, 7920*0b57cec5SDimitry Andric v4i32, v8i32>; 7921*0b57cec5SDimitry Andric defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64, 7922*0b57cec5SDimitry Andric mgatherv4i64, VR128, vx64mem, vy128mem, 7923*0b57cec5SDimitry Andric v4i32, v4i32>; 7924*0b57cec5SDimitry Andric } 7925*0b57cec5SDimitry Andric } 7926*0b57cec5SDimitry Andric} 7927*0b57cec5SDimitry Andric 7928*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7929*0b57cec5SDimitry Andric// GFNI instructions 7930*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 7931*0b57cec5SDimitry Andric 7932*0b57cec5SDimitry Andricmulticlass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 7933*0b57cec5SDimitry Andric RegisterClass RC, PatFrag MemOpFrag, 7934*0b57cec5SDimitry Andric X86MemOperand X86MemOp, bit Is2Addr = 0> { 7935*0b57cec5SDimitry Andric let ExeDomain = SSEPackedInt, 7936*0b57cec5SDimitry Andric AsmString = !if(Is2Addr, 7937*0b57cec5SDimitry Andric OpcodeStr##"\t{$src2, $dst|$dst, $src2}", 7938*0b57cec5SDimitry Andric OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 7939*0b57cec5SDimitry Andric let isCommutable = 1 in 7940*0b57cec5SDimitry Andric def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 7941*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 7942*0b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM]>, T8PD; 7943*0b57cec5SDimitry Andric 7944*0b57cec5SDimitry Andric def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 7945*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 7946*0b57cec5SDimitry Andric (MemOpFrag addr:$src2))))]>, 7947*0b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD; 7948*0b57cec5SDimitry Andric } 7949*0b57cec5SDimitry Andric} 7950*0b57cec5SDimitry Andric 7951*0b57cec5SDimitry Andricmulticlass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 7952*0b57cec5SDimitry Andric SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 7953*0b57cec5SDimitry Andric X86MemOperand X86MemOp, bit Is2Addr = 0> { 7954*0b57cec5SDimitry Andric let AsmString = !if(Is2Addr, 7955*0b57cec5SDimitry Andric OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7956*0b57cec5SDimitry Andric OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 7957*0b57cec5SDimitry Andric def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 7958*0b57cec5SDimitry Andric (ins RC:$src1, RC:$src2, u8imm:$src3), "", 7959*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], 7960*0b57cec5SDimitry Andric SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; 7961*0b57cec5SDimitry Andric def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 7962*0b57cec5SDimitry Andric (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 7963*0b57cec5SDimitry Andric [(set RC:$dst, (OpVT (OpNode RC:$src1, 7964*0b57cec5SDimitry Andric (MemOpFrag addr:$src2), 7965*0b57cec5SDimitry Andric imm:$src3)))], SSEPackedInt>, 7966*0b57cec5SDimitry Andric Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; 7967*0b57cec5SDimitry Andric } 7968*0b57cec5SDimitry Andric} 7969*0b57cec5SDimitry Andric 7970*0b57cec5SDimitry Andricmulticlass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 7971*0b57cec5SDimitry Andric let Constraints = "$src1 = $dst", 7972*0b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 7973*0b57cec5SDimitry Andric defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 7974*0b57cec5SDimitry Andric VR128, load, i128mem, 1>; 7975*0b57cec5SDimitry Andric let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 7976*0b57cec5SDimitry Andric defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128, 7977*0b57cec5SDimitry Andric load, i128mem>, VEX_4V, VEX_W; 7978*0b57cec5SDimitry Andric defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256, 7979*0b57cec5SDimitry Andric load, i256mem>, VEX_4V, VEX_L, VEX_W; 7980*0b57cec5SDimitry Andric } 7981*0b57cec5SDimitry Andric} 7982*0b57cec5SDimitry Andric 7983*0b57cec5SDimitry Andric// GF2P8MULB 7984*0b57cec5SDimitry Andriclet Constraints = "$src1 = $dst", 7985*0b57cec5SDimitry Andric Predicates = [HasGFNI, UseSSE2] in 7986*0b57cec5SDimitry Andricdefm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 7987*0b57cec5SDimitry Andric i128mem, 1>; 7988*0b57cec5SDimitry Andriclet Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 7989*0b57cec5SDimitry Andric defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 7990*0b57cec5SDimitry Andric i128mem>, VEX_4V; 7991*0b57cec5SDimitry Andric defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 7992*0b57cec5SDimitry Andric i256mem>, VEX_4V, VEX_L; 7993*0b57cec5SDimitry Andric} 7994*0b57cec5SDimitry Andric// GF2P8AFFINEINVQB, GF2P8AFFINEQB 7995*0b57cec5SDimitry Andriclet isCommutable = 0 in { 7996*0b57cec5SDimitry Andric defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 7997*0b57cec5SDimitry Andric X86GF2P8affineinvqb>, TAPD; 7998*0b57cec5SDimitry Andric defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 7999*0b57cec5SDimitry Andric X86GF2P8affineqb>, TAPD; 8000*0b57cec5SDimitry Andric} 8001*0b57cec5SDimitry Andric 8002