1//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes XOP (eXtended OPerations) 10// 11//===----------------------------------------------------------------------===// 12 13multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int> { 14 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 15 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 16 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWriteVecALU.XMM]>; 17 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 18 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 19 [(set VR128:$dst, (Int (load addr:$src)))]>, XOP, 20 Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; 21} 22 23let ExeDomain = SSEPackedInt in { 24 defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd>; 25 defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq>; 26 defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw>; 27 defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq>; 28 defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd>; 29 defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq>; 30 defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd>; 31 defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq>; 32 defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw>; 33 defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq>; 34 defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd>; 35 defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq>; 36 defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw>; 37 defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq>; 38 defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd>; 39} 40 41// Scalar load 2 addr operand instructions 42multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int, 43 Operand memop, PatFrags mem_frags, 44 X86FoldableSchedWrite sched> { 45 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 46 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 47 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>; 48 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src), 49 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 50 [(set VR128:$dst, (Int (mem_frags addr:$src)))]>, XOP, 51 Sched<[sched.Folded, sched.ReadAfterFold]>; 52} 53 54multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int, 55 X86FoldableSchedWrite sched> { 56 def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 57 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 58 [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>; 59 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 60 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 61 [(set VR128:$dst, (Int (load addr:$src)))]>, XOP, 62 Sched<[sched.Folded, sched.ReadAfterFold]>; 63} 64 65multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int, 66 X86FoldableSchedWrite sched> { 67 def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 68 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 69 [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>; 70 def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 71 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 72 [(set VR256:$dst, (Int (load addr:$src)))]>, XOP, VEX_L, 73 Sched<[sched.Folded, sched.ReadAfterFold]>; 74} 75 76let ExeDomain = SSEPackedSingle in { 77 defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, 78 ssmem, sse_load_f32, SchedWriteFRnd.Scl>; 79 defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, 80 SchedWriteFRnd.XMM>; 81 defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, 82 SchedWriteFRnd.YMM>; 83} 84 85let ExeDomain = SSEPackedDouble in { 86 defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd, 87 sdmem, sse_load_f64, SchedWriteFRnd.Scl>; 88 defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, 89 SchedWriteFRnd.XMM>; 90 defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, 91 SchedWriteFRnd.YMM>; 92} 93 94multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode, 95 ValueType vt128, X86FoldableSchedWrite sched> { 96 def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst), 97 (ins VR128:$src1, VR128:$src2), 98 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 99 [(set VR128:$dst, 100 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>, 101 XOP, Sched<[sched]>; 102 def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), 103 (ins VR128:$src1, i128mem:$src2), 104 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 105 [(set VR128:$dst, 106 (vt128 (OpNode (vt128 VR128:$src1), 107 (vt128 (load addr:$src2)))))]>, 108 XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>; 109 def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst), 110 (ins i128mem:$src1, VR128:$src2), 111 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 112 [(set VR128:$dst, 113 (vt128 (OpNode (vt128 (load addr:$src1)), 114 (vt128 VR128:$src2))))]>, 115 XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; 116 // For disassembler 117 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 118 def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst), 119 (ins VR128:$src1, VR128:$src2), 120 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 121 []>, 122 XOP, VVVV, REX_W, Sched<[sched]>; 123} 124 125let ExeDomain = SSEPackedInt in { 126 defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>; 127 defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>; 128 defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>; 129 defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>; 130 defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>; 131 defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>; 132 defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>; 133 defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>; 134 defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>; 135 defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>; 136 defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>; 137 defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>; 138} 139 140multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode, 141 ValueType vt128, X86FoldableSchedWrite sched> { 142 def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), 143 (ins VR128:$src1, u8imm:$src2), 144 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 145 [(set VR128:$dst, 146 (vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>, 147 XOP, Sched<[sched]>; 148 def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), 149 (ins i128mem:$src1, u8imm:$src2), 150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 151 [(set VR128:$dst, 152 (vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>, 153 XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; 154} 155 156let ExeDomain = SSEPackedInt in { 157 defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8, 158 SchedWriteVecShiftImm.XMM>; 159 defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32, 160 SchedWriteVecShiftImm.XMM>; 161 defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64, 162 SchedWriteVecShiftImm.XMM>; 163 defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16, 164 SchedWriteVecShiftImm.XMM>; 165} 166 167// Instruction where second source can be memory, but third must be register 168multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int, 169 X86FoldableSchedWrite sched> { 170 let isCommutable = 1 in 171 def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), 172 (ins VR128:$src1, VR128:$src2, VR128:$src3), 173 !strconcat(OpcodeStr, 174 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 175 [(set VR128:$dst, 176 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP, VVVV, 177 Sched<[sched]>; 178 def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), 179 (ins VR128:$src1, i128mem:$src2, VR128:$src3), 180 !strconcat(OpcodeStr, 181 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 182 [(set VR128:$dst, 183 (Int VR128:$src1, (load addr:$src2), 184 VR128:$src3))]>, XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 185} 186 187let ExeDomain = SSEPackedInt in { 188 defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", 189 int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>; 190 defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", 191 int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>; 192 defm VPMACSWW : xop4opm2<0x95, "vpmacsww", 193 int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>; 194 defm VPMACSWD : xop4opm2<0x96, "vpmacswd", 195 int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>; 196 defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", 197 int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>; 198 defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", 199 int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>; 200 defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", 201 int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>; 202 defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", 203 int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>; 204 defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", 205 int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>; 206 defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", 207 int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>; 208 defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", 209 int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>; 210 defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", 211 int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>; 212} 213 214// IFMA patterns - for cases where we can safely ignore the overflow bits from 215// the multiply or easily match with existing intrinsics. 216let Predicates = [HasXOP] in { 217 def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)), 218 (v8i16 VR128:$src3))), 219 (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>; 220 def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)), 221 (v4i32 VR128:$src3))), 222 (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 223 def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))), 224 (bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))), 225 (v2i64 VR128:$src3))), 226 (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>; 227 def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)), 228 (v2i64 VR128:$src3))), 229 (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>; 230 def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)), 231 (v4i32 VR128:$src3))), 232 (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 233} 234 235// Transforms to swizzle an immediate to help matching memory operand in first 236// operand. 237def CommuteVPCOMCC : SDNodeXForm<imm, [{ 238 uint8_t Imm = N->getZExtValue() & 0x7; 239 Imm = X86::getSwappedVPCOMImm(Imm); 240 return getI8Imm(Imm, SDLoc(N)); 241}]>; 242 243// Instruction where second source can be memory, third must be imm8 244multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128, 245 X86FoldableSchedWrite sched> { 246 let ExeDomain = SSEPackedInt in { // SSE integer instructions 247 let isCommutable = 1 in 248 def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), 249 (ins VR128:$src1, VR128:$src2, u8imm:$cc), 250 !strconcat("vpcom", Suffix, 251 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 252 [(set VR128:$dst, 253 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 254 timm:$cc)))]>, 255 XOP, VVVV, Sched<[sched]>; 256 def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), 257 (ins VR128:$src1, i128mem:$src2, u8imm:$cc), 258 !strconcat("vpcom", Suffix, 259 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 260 [(set VR128:$dst, 261 (vt128 (OpNode (vt128 VR128:$src1), 262 (vt128 (load addr:$src2)), 263 timm:$cc)))]>, 264 XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 265 } 266 267 def : Pat<(OpNode (load addr:$src2), 268 (vt128 VR128:$src1), timm:$cc), 269 (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2, 270 (CommuteVPCOMCC timm:$cc))>; 271} 272 273defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>; 274defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>; 275defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>; 276defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>; 277defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>; 278defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>; 279defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>; 280defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>; 281 282multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, 283 ValueType vt128, X86FoldableSchedWrite sched> { 284 def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst), 285 (ins VR128:$src1, VR128:$src2, VR128:$src3), 286 !strconcat(OpcodeStr, 287 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 288 [(set VR128:$dst, 289 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 290 (vt128 VR128:$src3))))]>, 291 XOP, VVVV, Sched<[sched]>; 292 def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst), 293 (ins VR128:$src1, VR128:$src2, i128mem:$src3), 294 !strconcat(OpcodeStr, 295 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 296 [(set VR128:$dst, 297 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), 298 (vt128 (load addr:$src3)))))]>, 299 XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; 300 def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), 301 (ins VR128:$src1, i128mem:$src2, VR128:$src3), 302 !strconcat(OpcodeStr, 303 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 304 [(set VR128:$dst, 305 (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), 306 (vt128 VR128:$src3))))]>, 307 XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 308 // 128mem:$src2 309 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 310 ReadDefault, 311 // VR128:$src3 312 sched.ReadAfterFold]>; 313 // For disassembler 314 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 315 def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst), 316 (ins VR128:$src1, VR128:$src2, VR128:$src3), 317 !strconcat(OpcodeStr, 318 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 319 []>, XOP, VVVV, REX_W, Sched<[sched]>; 320} 321 322let ExeDomain = SSEPackedInt in { 323 defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8, 324 SchedWriteVarShuffle.XMM>; 325} 326 327// Instruction where either second or third source can be memory 328multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 329 X86MemOperand x86memop, ValueType VT, 330 X86FoldableSchedWrite sched> { 331 def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst), 332 (ins RC:$src1, RC:$src2, RC:$src3), 333 !strconcat(OpcodeStr, 334 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 335 [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), 336 (X86andnp RC:$src3, RC:$src2))))]>, XOP, VVVV, 337 Sched<[sched]>; 338 // FIXME: We can't write a pattern for this in tablegen. 339 let hasSideEffects = 0, mayLoad = 1 in 340 def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst), 341 (ins RC:$src1, RC:$src2, x86memop:$src3), 342 !strconcat(OpcodeStr, 343 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 344 []>, 345 XOP, VVVV, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; 346 def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst), 347 (ins RC:$src1, x86memop:$src2, RC:$src3), 348 !strconcat(OpcodeStr, 349 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 350 [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1), 351 (X86andnp RC:$src3, (load addr:$src2)))))]>, 352 XOP, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 353 // x86memop:$src2 354 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 355 ReadDefault, 356 // RC::$src3 357 sched.ReadAfterFold]>; 358 // For disassembler 359 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 360 def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst), 361 (ins RC:$src1, RC:$src2, RC:$src3), 362 !strconcat(OpcodeStr, 363 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 364 []>, XOP, VVVV, REX_W, Sched<[sched]>; 365} 366 367let ExeDomain = SSEPackedInt in { 368 defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64, 369 SchedWriteShuffle.XMM>; 370 defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64, 371 SchedWriteShuffle.YMM>, VEX_L; 372} 373 374let Predicates = [HasXOP] in { 375 def : Pat<(v16i8 (or (and VR128:$src3, VR128:$src1), 376 (X86andnp VR128:$src3, VR128:$src2))), 377 (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>; 378 def : Pat<(v8i16 (or (and VR128:$src3, VR128:$src1), 379 (X86andnp VR128:$src3, VR128:$src2))), 380 (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>; 381 def : Pat<(v4i32 (or (and VR128:$src3, VR128:$src1), 382 (X86andnp VR128:$src3, VR128:$src2))), 383 (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>; 384 385 def : Pat<(or (and VR128:$src3, VR128:$src1), 386 (X86andnp VR128:$src3, (loadv16i8 addr:$src2))), 387 (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>; 388 def : Pat<(or (and VR128:$src3, VR128:$src1), 389 (X86andnp VR128:$src3, (loadv8i16 addr:$src2))), 390 (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>; 391 def : Pat<(or (and VR128:$src3, VR128:$src1), 392 (X86andnp VR128:$src3, (loadv4i32 addr:$src2))), 393 (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>; 394 395 def : Pat<(v32i8 (or (and VR256:$src3, VR256:$src1), 396 (X86andnp VR256:$src3, VR256:$src2))), 397 (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>; 398 def : Pat<(v16i16 (or (and VR256:$src3, VR256:$src1), 399 (X86andnp VR256:$src3, VR256:$src2))), 400 (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>; 401 def : Pat<(v8i32 (or (and VR256:$src3, VR256:$src1), 402 (X86andnp VR256:$src3, VR256:$src2))), 403 (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>; 404 405 def : Pat<(or (and VR256:$src3, VR256:$src1), 406 (X86andnp VR256:$src3, (loadv32i8 addr:$src2))), 407 (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>; 408 def : Pat<(or (and VR256:$src3, VR256:$src1), 409 (X86andnp VR256:$src3, (loadv16i16 addr:$src2))), 410 (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>; 411 def : Pat<(or (and VR256:$src3, VR256:$src1), 412 (X86andnp VR256:$src3, (loadv8i32 addr:$src2))), 413 (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>; 414} 415 416multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, 417 X86MemOperand intmemop, X86MemOperand fpmemop, 418 ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag, 419 X86FoldableSchedWrite sched> { 420 def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst), 421 (ins RC:$src1, RC:$src2, RC:$src3, u4imm:$src4), 422 !strconcat(OpcodeStr, 423 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 424 [(set RC:$dst, 425 (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>, 426 Sched<[sched]>; 427 def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst), 428 (ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4), 429 !strconcat(OpcodeStr, 430 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 431 [(set RC:$dst, 432 (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3), 433 (i8 timm:$src4))))]>, REX_W, 434 Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; 435 def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst), 436 (ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4), 437 !strconcat(OpcodeStr, 438 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 439 [(set RC:$dst, 440 (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), 441 RC:$src3, (i8 timm:$src4))))]>, 442 Sched<[sched.Folded, sched.ReadAfterFold, 443 // fpmemop:$src2 444 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, 445 // RC:$src3 446 sched.ReadAfterFold]>; 447 // For disassembler 448 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 449 def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst), 450 (ins RC:$src1, RC:$src2, RC:$src3, u4imm:$src4), 451 !strconcat(OpcodeStr, 452 "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), 453 []>, REX_W, Sched<[sched]>; 454} 455 456let ExeDomain = SSEPackedDouble in { 457 defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem, 458 v2f64, loadv2f64, loadv2i64, 459 SchedWriteFVarShuffle.XMM>; 460 defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem, 461 v4f64, loadv4f64, loadv4i64, 462 SchedWriteFVarShuffle.YMM>, VEX_L; 463} 464 465let ExeDomain = SSEPackedSingle in { 466 defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem, 467 v4f32, loadv4f32, loadv4i32, 468 SchedWriteFVarShuffle.XMM>; 469 defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem, 470 v8f32, loadv8f32, loadv8i32, 471 SchedWriteFVarShuffle.YMM>, VEX_L; 472} 473 474