1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 SSE instruction set, defining the instructions, 10// and properties of the instructions which are needed for code generation, 11// machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15//===----------------------------------------------------------------------===// 16// SSE 1 & 2 Instructions Classes 17//===----------------------------------------------------------------------===// 18 19/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 20multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 21 RegisterClass RC, X86MemOperand x86memop, 22 Domain d, X86FoldableSchedWrite sched, 23 bit Is2Addr = 1> { 24let isCodeGenOnly = 1 in { 25 let isCommutable = 1 in { 26 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 27 !if(Is2Addr, 28 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 29 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 30 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 31 Sched<[sched]>; 32 } 33 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 34 !if(Is2Addr, 35 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 38 Sched<[sched.Folded, sched.ReadAfterFold]>; 39} 40} 41 42/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 43multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, 44 SDPatternOperator OpNode, RegisterClass RC, 45 ValueType VT, string asm, Operand memopr, 46 PatFrags mem_frags, Domain d, 47 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 48let hasSideEffects = 0 in { 49 def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 50 !if(Is2Addr, 51 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 52 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 53 [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 54 Sched<[sched]>; 55 let mayLoad = 1 in 56 def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 57 !if(Is2Addr, 58 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 59 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 60 [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>, 61 Sched<[sched.Folded, sched.ReadAfterFold]>; 62} 63} 64 65/// sse12_fp_packed - SSE 1 & 2 packed instructions class 66multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 67 RegisterClass RC, ValueType vt, 68 X86MemOperand x86memop, PatFrag mem_frag, 69 Domain d, X86FoldableSchedWrite sched, 70 bit Is2Addr = 1> { 71 let isCommutable = 1 in 72 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 73 !if(Is2Addr, 74 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 75 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 76 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 77 Sched<[sched]>; 78 let mayLoad = 1 in 79 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 80 !if(Is2Addr, 81 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 82 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 83 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 84 d>, 85 Sched<[sched.Folded, sched.ReadAfterFold]>; 86} 87 88/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 89multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 90 string OpcodeStr, X86MemOperand x86memop, 91 X86FoldableSchedWrite sched, 92 list<dag> pat_rr, list<dag> pat_rm, 93 bit Is2Addr = 1> { 94 let isCommutable = 1, hasSideEffects = 0 in 95 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 96 !if(Is2Addr, 97 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 98 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 99 pat_rr, d>, 100 Sched<[sched]>; 101 let hasSideEffects = 0, mayLoad = 1 in 102 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 103 !if(Is2Addr, 104 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 105 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 106 pat_rm, d>, 107 Sched<[sched.Folded, sched.ReadAfterFold]>; 108} 109 110 111// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 112// This is expanded by ExpandPostRAPseudos. 113let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 114 isPseudo = 1, SchedRW = [WriteZero] in { 115 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 116 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 117 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 118 [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; 119 def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 120 [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; 121} 122 123//===----------------------------------------------------------------------===// 124// AVX & SSE - Zero/One Vectors 125//===----------------------------------------------------------------------===// 126 127// Alias instruction that maps zero vector to pxor / xorp* for sse. 128// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 129// swizzled by ExecutionDomainFix to pxor. 130// We set canFoldAsLoad because this can be converted to a constant-pool 131// load of an all-zeros value if folding it would be beneficial. 132let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 133 isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 134def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 135 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 136} 137 138let Predicates = [NoAVX512] in { 139def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 140def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 141def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 142def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 143def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 144} 145 146 147// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 148// and doesn't need it because on sandy bridge the register is set to zero 149// at the rename stage without using any execution unit, so SET0PSY 150// and SET0PDY can be used for vector int instructions without penalty 151let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 152 isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 153def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 154 [(set VR256:$dst, (v8i32 immAllZerosV))]>; 155} 156 157let Predicates = [NoAVX512] in { 158def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 159def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 160def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 161def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; 162def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 163} 164 165// We set canFoldAsLoad because this can be converted to a constant-pool 166// load of an all-ones value if folding it would be beneficial. 167let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 168 isPseudo = 1, SchedRW = [WriteZero] in { 169 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 170 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 171 let Predicates = [HasAVX1Only, OptForMinSize] in { 172 def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 173 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 174 } 175 let Predicates = [HasAVX2] in 176 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 177 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 178} 179 180//===----------------------------------------------------------------------===// 181// SSE 1 & 2 - Move FP Scalar Instructions 182// 183// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 184// register copies because it's a partial register update; Register-to-register 185// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 186// that the insert be implementable in terms of a copy, and just mentioned, we 187// don't use movss/movsd for copies. 188//===----------------------------------------------------------------------===// 189 190multiclass sse12_move_rr<SDNode OpNode, ValueType vt, 191 X86MemOperand x86memop, string base_opc, 192 string asm_opr, Domain d, string Name> { 193 let isCommutable = 1 in 194 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 195 (ins VR128:$src1, VR128:$src2), 196 !strconcat(base_opc, asm_opr), 197 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 198 Sched<[SchedWriteFShuffle.XMM]>; 199 200 // For the disassembler 201 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 202 def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 203 (ins VR128:$src1, VR128:$src2), 204 !strconcat(base_opc, asm_opr), []>, 205 Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; 206} 207 208multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 209 X86MemOperand x86memop, string OpcodeStr, 210 Domain d, string Name, Predicate pred> { 211 // AVX 212 let Predicates = [UseAVX, OptForSize] in 213 defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 214 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, 215 "V"#Name>, 216 VEX_4V, VEX_LIG, VEX_WIG; 217 218 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 219 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 220 [(store RC:$src, addr:$dst)], d>, 221 VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; 222 // SSE1 & 2 223 let Constraints = "$src1 = $dst" in { 224 let Predicates = [pred, NoSSE41_Or_OptForSize] in 225 defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, 226 "\t{$src2, $dst|$dst, $src2}", d, Name>; 227 } 228 229 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 230 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 231 [(store RC:$src, addr:$dst)], d>, 232 Sched<[WriteFStore]>; 233 234 def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 235 (!cast<Instruction>("V"#NAME#"rr_REV") 236 VR128:$dst, VR128:$src1, VR128:$src2), 0>; 237 def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 238 (!cast<Instruction>(NAME#"rr_REV") 239 VR128:$dst, VR128:$src2), 0>; 240} 241 242// Loading from memory automatically zeroing upper bits. 243multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 244 PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 245 Domain d> { 246 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 247 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 248 [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 249 VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 250 def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 251 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 252 [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 253 Sched<[WriteFLoad]>; 254 255 // _alt version uses FR32/FR64 register class. 256 let isCodeGenOnly = 1 in { 257 def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 258 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 259 [(set RC:$dst, (mem_pat addr:$src))], d>, 260 VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 261 def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 262 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 263 [(set RC:$dst, (mem_pat addr:$src))], d>, 264 Sched<[WriteFLoad]>; 265 } 266} 267 268defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 269 SSEPackedSingle, "MOVSS", UseSSE1>, XS; 270defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 271 SSEPackedDouble, "MOVSD", UseSSE2>, XD; 272 273let canFoldAsLoad = 1, isReMaterializable = 1 in { 274 defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 275 SSEPackedSingle>, XS; 276 defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 277 SSEPackedDouble>, XD; 278} 279 280// Patterns 281let Predicates = [UseAVX] in { 282 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 283 (VMOVSSrm addr:$src)>; 284 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 285 (VMOVSDrm addr:$src)>; 286 287 // Represent the same patterns above but in the form they appear for 288 // 256-bit types 289 def : Pat<(v8f32 (X86vzload32 addr:$src)), 290 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 291 def : Pat<(v4f64 (X86vzload64 addr:$src)), 292 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 293} 294 295let Predicates = [UseAVX, OptForSize] in { 296 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 297 // MOVSS to the lower bits. 298 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 299 (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 300 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 301 (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 302 303 // Move low f32 and clear high bits. 304 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 305 (SUBREG_TO_REG (i32 0), 306 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 307 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 308 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 309 (SUBREG_TO_REG (i32 0), 310 (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 311 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 312} 313 314let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 315// Move scalar to XMM zero-extended, zeroing a VR128 then do a 316// MOVSS to the lower bits. 317def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 318 (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 319def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 320 (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 321} 322 323let Predicates = [UseSSE2] in 324def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 325 (MOVSDrm addr:$src)>; 326 327let Predicates = [UseSSE1] in 328def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 329 (MOVSSrm addr:$src)>; 330 331//===----------------------------------------------------------------------===// 332// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 333//===----------------------------------------------------------------------===// 334 335multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 336 X86MemOperand x86memop, PatFrag ld_frag, 337 string asm, Domain d, 338 X86SchedWriteMoveLS sched> { 339let hasSideEffects = 0, isMoveReg = 1 in 340 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 341 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 342 Sched<[sched.RR]>; 343let canFoldAsLoad = 1, isReMaterializable = 1 in 344 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 345 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 346 [(set RC:$dst, (ld_frag addr:$src))], d>, 347 Sched<[sched.RM]>; 348} 349 350let Predicates = [HasAVX, NoVLX] in { 351defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 352 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 353 PS, VEX, VEX_WIG; 354defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 355 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 356 PD, VEX, VEX_WIG; 357defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 358 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 359 PS, VEX, VEX_WIG; 360defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 361 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 362 PD, VEX, VEX_WIG; 363 364defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 365 SSEPackedSingle, SchedWriteFMoveLS.YMM>, 366 PS, VEX, VEX_L, VEX_WIG; 367defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 368 SSEPackedDouble, SchedWriteFMoveLS.YMM>, 369 PD, VEX, VEX_L, VEX_WIG; 370defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 371 SSEPackedSingle, SchedWriteFMoveLS.YMM>, 372 PS, VEX, VEX_L, VEX_WIG; 373defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 374 SSEPackedDouble, SchedWriteFMoveLS.YMM>, 375 PD, VEX, VEX_L, VEX_WIG; 376} 377 378let Predicates = [UseSSE1] in { 379defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 380 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 381 PS; 382defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 383 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 384 PS; 385} 386let Predicates = [UseSSE2] in { 387defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 388 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 389 PD; 390defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 391 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 392 PD; 393} 394 395let Predicates = [HasAVX, NoVLX] in { 396let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 397def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 398 "movaps\t{$src, $dst|$dst, $src}", 399 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 400 VEX, VEX_WIG; 401def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 402 "movapd\t{$src, $dst|$dst, $src}", 403 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 404 VEX, VEX_WIG; 405def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 406 "movups\t{$src, $dst|$dst, $src}", 407 [(store (v4f32 VR128:$src), addr:$dst)]>, 408 VEX, VEX_WIG; 409def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 410 "movupd\t{$src, $dst|$dst, $src}", 411 [(store (v2f64 VR128:$src), addr:$dst)]>, 412 VEX, VEX_WIG; 413} // SchedRW 414 415let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 416def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 417 "movaps\t{$src, $dst|$dst, $src}", 418 [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 419 VEX, VEX_L, VEX_WIG; 420def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 421 "movapd\t{$src, $dst|$dst, $src}", 422 [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 423 VEX, VEX_L, VEX_WIG; 424def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 425 "movups\t{$src, $dst|$dst, $src}", 426 [(store (v8f32 VR256:$src), addr:$dst)]>, 427 VEX, VEX_L, VEX_WIG; 428def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 429 "movupd\t{$src, $dst|$dst, $src}", 430 [(store (v4f64 VR256:$src), addr:$dst)]>, 431 VEX, VEX_L, VEX_WIG; 432} // SchedRW 433} // Predicate 434 435// For disassembler 436let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 437 isMoveReg = 1 in { 438let SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 439 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 440 (ins VR128:$src), 441 "movaps\t{$src, $dst|$dst, $src}", []>, 442 VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; 443 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 444 (ins VR128:$src), 445 "movapd\t{$src, $dst|$dst, $src}", []>, 446 VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; 447 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 448 (ins VR128:$src), 449 "movups\t{$src, $dst|$dst, $src}", []>, 450 VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; 451 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 452 (ins VR128:$src), 453 "movupd\t{$src, $dst|$dst, $src}", []>, 454 VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; 455} // SchedRW 456 457let SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 458 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 459 (ins VR256:$src), 460 "movaps\t{$src, $dst|$dst, $src}", []>, 461 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; 462 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 463 (ins VR256:$src), 464 "movapd\t{$src, $dst|$dst, $src}", []>, 465 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; 466 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 467 (ins VR256:$src), 468 "movups\t{$src, $dst|$dst, $src}", []>, 469 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; 470 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 471 (ins VR256:$src), 472 "movupd\t{$src, $dst|$dst, $src}", []>, 473 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; 474} // SchedRW 475} // Predicate 476 477// Reversed version with ".s" suffix for GAS compatibility. 478def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 479 (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 480def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 481 (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 482def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 483 (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 484def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 485 (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 486def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 487 (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 488def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 489 (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 490def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 491 (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 492def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 493 (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 494 495let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 496def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 497 "movaps\t{$src, $dst|$dst, $src}", 498 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 499def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 500 "movapd\t{$src, $dst|$dst, $src}", 501 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 502def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 503 "movups\t{$src, $dst|$dst, $src}", 504 [(store (v4f32 VR128:$src), addr:$dst)]>; 505def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 506 "movupd\t{$src, $dst|$dst, $src}", 507 [(store (v2f64 VR128:$src), addr:$dst)]>; 508} // SchedRW 509 510// For disassembler 511let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 512 isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 513 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 514 "movaps\t{$src, $dst|$dst, $src}", []>, 515 FoldGenData<"MOVAPSrr">; 516 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 517 "movapd\t{$src, $dst|$dst, $src}", []>, 518 FoldGenData<"MOVAPDrr">; 519 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 520 "movups\t{$src, $dst|$dst, $src}", []>, 521 FoldGenData<"MOVUPSrr">; 522 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 523 "movupd\t{$src, $dst|$dst, $src}", []>, 524 FoldGenData<"MOVUPDrr">; 525} 526 527// Reversed version with ".s" suffix for GAS compatibility. 528def : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 529 (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 530def : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 531 (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 532def : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 533 (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 534def : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 535 (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 536 537let Predicates = [HasAVX, NoVLX] in { 538 // 256-bit load/store need to use floating point load/store in case we don't 539 // have AVX2. Execution domain fixing will convert to integer if AVX2 is 540 // available and changing the domain is beneficial. 541 def : Pat<(alignedloadv4i64 addr:$src), 542 (VMOVAPSYrm addr:$src)>; 543 def : Pat<(alignedloadv8i32 addr:$src), 544 (VMOVAPSYrm addr:$src)>; 545 def : Pat<(alignedloadv16i16 addr:$src), 546 (VMOVAPSYrm addr:$src)>; 547 def : Pat<(alignedloadv32i8 addr:$src), 548 (VMOVAPSYrm addr:$src)>; 549 def : Pat<(loadv4i64 addr:$src), 550 (VMOVUPSYrm addr:$src)>; 551 def : Pat<(loadv8i32 addr:$src), 552 (VMOVUPSYrm addr:$src)>; 553 def : Pat<(loadv16i16 addr:$src), 554 (VMOVUPSYrm addr:$src)>; 555 def : Pat<(loadv32i8 addr:$src), 556 (VMOVUPSYrm addr:$src)>; 557 558 def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 559 (VMOVAPSYmr addr:$dst, VR256:$src)>; 560 def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 561 (VMOVAPSYmr addr:$dst, VR256:$src)>; 562 def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 563 (VMOVAPSYmr addr:$dst, VR256:$src)>; 564 def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 565 (VMOVAPSYmr addr:$dst, VR256:$src)>; 566 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 567 (VMOVUPSYmr addr:$dst, VR256:$src)>; 568 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 569 (VMOVUPSYmr addr:$dst, VR256:$src)>; 570 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 571 (VMOVUPSYmr addr:$dst, VR256:$src)>; 572 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 573 (VMOVUPSYmr addr:$dst, VR256:$src)>; 574} 575 576// Use movaps / movups for SSE integer load / store (one byte shorter). 577// The instructions selected below are then converted to MOVDQA/MOVDQU 578// during the SSE domain pass. 579let Predicates = [UseSSE1] in { 580 def : Pat<(alignedloadv2i64 addr:$src), 581 (MOVAPSrm addr:$src)>; 582 def : Pat<(alignedloadv4i32 addr:$src), 583 (MOVAPSrm addr:$src)>; 584 def : Pat<(alignedloadv8i16 addr:$src), 585 (MOVAPSrm addr:$src)>; 586 def : Pat<(alignedloadv16i8 addr:$src), 587 (MOVAPSrm addr:$src)>; 588 def : Pat<(loadv2i64 addr:$src), 589 (MOVUPSrm addr:$src)>; 590 def : Pat<(loadv4i32 addr:$src), 591 (MOVUPSrm addr:$src)>; 592 def : Pat<(loadv8i16 addr:$src), 593 (MOVUPSrm addr:$src)>; 594 def : Pat<(loadv16i8 addr:$src), 595 (MOVUPSrm addr:$src)>; 596 597 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 598 (MOVAPSmr addr:$dst, VR128:$src)>; 599 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 600 (MOVAPSmr addr:$dst, VR128:$src)>; 601 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 602 (MOVAPSmr addr:$dst, VR128:$src)>; 603 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 604 (MOVAPSmr addr:$dst, VR128:$src)>; 605 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 606 (MOVUPSmr addr:$dst, VR128:$src)>; 607 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 608 (MOVUPSmr addr:$dst, VR128:$src)>; 609 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 610 (MOVUPSmr addr:$dst, VR128:$src)>; 611 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 612 (MOVUPSmr addr:$dst, VR128:$src)>; 613} 614 615//===----------------------------------------------------------------------===// 616// SSE 1 & 2 - Move Low packed FP Instructions 617//===----------------------------------------------------------------------===// 618 619multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, 620 string base_opc, string asm_opr> { 621 // No pattern as they need be special cased between high and low. 622 let hasSideEffects = 0, mayLoad = 1 in 623 def PSrm : PI<opc, MRMSrcMem, 624 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 625 !strconcat(base_opc, "s", asm_opr), 626 [], SSEPackedSingle>, PS, 627 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 628 629 def PDrm : PI<opc, MRMSrcMem, 630 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 631 !strconcat(base_opc, "d", asm_opr), 632 [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 633 (scalar_to_vector (loadf64 addr:$src2)))))], 634 SSEPackedDouble>, PD, 635 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 636} 637 638multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 639 string base_opc> { 640 let Predicates = [UseAVX] in 641 defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 642 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 643 VEX_4V, VEX_WIG; 644 645 let Constraints = "$src1 = $dst" in 646 defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 647 "\t{$src2, $dst|$dst, $src2}">; 648} 649 650defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 651 652let SchedRW = [WriteFStore] in { 653let Predicates = [UseAVX] in { 654let mayStore = 1, hasSideEffects = 0 in 655def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 656 "movlps\t{$src, $dst|$dst, $src}", 657 []>, 658 VEX, VEX_WIG; 659def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 660 "movlpd\t{$src, $dst|$dst, $src}", 661 [(store (f64 (extractelt (v2f64 VR128:$src), 662 (iPTR 0))), addr:$dst)]>, 663 VEX, VEX_WIG; 664}// UseAVX 665let mayStore = 1, hasSideEffects = 0 in 666def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 667 "movlps\t{$src, $dst|$dst, $src}", 668 []>; 669def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 670 "movlpd\t{$src, $dst|$dst, $src}", 671 [(store (f64 (extractelt (v2f64 VR128:$src), 672 (iPTR 0))), addr:$dst)]>; 673} // SchedRW 674 675let Predicates = [UseSSE1] in { 676 // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 677 // end up with a movsd or blend instead of shufp. 678 // No need for aligned load, we're only loading 64-bits. 679 def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, 680 (i8 -28)), 681 (MOVLPSrm VR128:$src1, addr:$src2)>; 682 def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 683 (MOVLPSrm VR128:$src1, addr:$src2)>; 684 685 def : Pat<(v4f32 (X86vzload64 addr:$src)), 686 (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 687 def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 688 (MOVLPSmr addr:$dst, VR128:$src)>; 689} 690 691//===----------------------------------------------------------------------===// 692// SSE 1 & 2 - Move Hi packed FP Instructions 693//===----------------------------------------------------------------------===// 694 695defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 696 697let SchedRW = [WriteFStore] in { 698// v2f64 extract element 1 is always custom lowered to unpack high to low 699// and extract element 0 so the non-store version isn't too horrible. 700let Predicates = [UseAVX] in { 701let mayStore = 1, hasSideEffects = 0 in 702def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 703 "movhps\t{$src, $dst|$dst, $src}", 704 []>, VEX, VEX_WIG; 705def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 706 "movhpd\t{$src, $dst|$dst, $src}", 707 [(store (f64 (extractelt 708 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 709 (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; 710} // UseAVX 711let mayStore = 1, hasSideEffects = 0 in 712def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 713 "movhps\t{$src, $dst|$dst, $src}", 714 []>; 715def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 716 "movhpd\t{$src, $dst|$dst, $src}", 717 [(store (f64 (extractelt 718 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 719 (iPTR 0))), addr:$dst)]>; 720} // SchedRW 721 722let Predicates = [UseAVX] in { 723 // MOVHPD patterns 724 def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 725 (VMOVHPDrm VR128:$src1, addr:$src2)>; 726 727 def : Pat<(store (f64 (extractelt 728 (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 729 (iPTR 0))), addr:$dst), 730 (VMOVHPDmr addr:$dst, VR128:$src)>; 731 732 // MOVLPD patterns 733 def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 734 (VMOVLPDrm VR128:$src1, addr:$src2)>; 735} 736 737let Predicates = [UseSSE1] in { 738 // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 739 // end up with a movsd or blend instead of shufp. 740 // No need for aligned load, we're only loading 64-bits. 741 def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), 742 (MOVHPSrm VR128:$src1, addr:$src2)>; 743 def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 744 (MOVHPSrm VR128:$src1, addr:$src2)>; 745 746 def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 747 addr:$dst), 748 (MOVHPSmr addr:$dst, VR128:$src)>; 749} 750 751let Predicates = [UseSSE2] in { 752 // MOVHPD patterns 753 def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 754 (MOVHPDrm VR128:$src1, addr:$src2)>; 755 756 def : Pat<(store (f64 (extractelt 757 (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 758 (iPTR 0))), addr:$dst), 759 (MOVHPDmr addr:$dst, VR128:$src)>; 760 761 // MOVLPD patterns 762 def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 763 (MOVLPDrm VR128:$src1, addr:$src2)>; 764} 765 766let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 767 // Use MOVLPD to load into the low bits from a full vector unless we can use 768 // BLENDPD. 769 def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), 770 (MOVLPDrm VR128:$src1, addr:$src2)>; 771} 772 773//===----------------------------------------------------------------------===// 774// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 775//===----------------------------------------------------------------------===// 776 777let Predicates = [UseAVX] in { 778 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 779 (ins VR128:$src1, VR128:$src2), 780 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 781 [(set VR128:$dst, 782 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 783 VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; 784 let isCommutable = 1 in 785 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 786 (ins VR128:$src1, VR128:$src2), 787 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 788 [(set VR128:$dst, 789 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 790 VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, 791 NotMemoryFoldable; 792} 793let Constraints = "$src1 = $dst" in { 794 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 795 (ins VR128:$src1, VR128:$src2), 796 "movlhps\t{$src2, $dst|$dst, $src2}", 797 [(set VR128:$dst, 798 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 799 Sched<[SchedWriteFShuffle.XMM]>; 800 let isCommutable = 1 in 801 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 802 (ins VR128:$src1, VR128:$src2), 803 "movhlps\t{$src2, $dst|$dst, $src2}", 804 [(set VR128:$dst, 805 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 806 Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; 807} 808 809//===----------------------------------------------------------------------===// 810// SSE 1 & 2 - Conversion Instructions 811//===----------------------------------------------------------------------===// 812 813multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 814 SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag, 815 string asm, string mem, X86FoldableSchedWrite sched, 816 Domain d, 817 SchedRead Int2Fpu = ReadDefault> { 818 let ExeDomain = d in { 819 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 820 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 821 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 822 Sched<[sched, Int2Fpu]>; 823 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 824 mem#"\t{$src, $dst|$dst, $src}", 825 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 826 Sched<[sched.Folded]>; 827 } 828} 829 830multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 831 ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 832 string asm, Domain d, X86FoldableSchedWrite sched> { 833let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { 834 def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 835 [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>, 836 Sched<[sched]>; 837 let mayLoad = 1 in 838 def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 839 [(set RC:$dst, (DstTy (any_sint_to_fp 840 (SrcTy (ld_frag addr:$src)))))], d>, 841 Sched<[sched.Folded]>; 842} 843} 844 845multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 846 X86MemOperand x86memop, string asm, string mem, 847 X86FoldableSchedWrite sched, Domain d> { 848let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { 849 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 850 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 851 Sched<[sched, ReadDefault, ReadInt2Fpu]>; 852 let mayLoad = 1 in 853 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 854 (ins DstRC:$src1, x86memop:$src), 855 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 856 Sched<[sched.Folded, sched.ReadAfterFold]>; 857} // hasSideEffects = 0 858} 859 860let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 861defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 862 "cvttss2si", "cvttss2si", 863 WriteCvtSS2I, SSEPackedSingle>, 864 XS, VEX, VEX_LIG; 865defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 866 "cvttss2si", "cvttss2si", 867 WriteCvtSS2I, SSEPackedSingle>, 868 XS, VEX, VEX_W, VEX_LIG; 869defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 870 "cvttsd2si", "cvttsd2si", 871 WriteCvtSD2I, SSEPackedDouble>, 872 XD, VEX, VEX_LIG; 873defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 874 "cvttsd2si", "cvttsd2si", 875 WriteCvtSD2I, SSEPackedDouble>, 876 XD, VEX, VEX_W, VEX_LIG; 877 878defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 879 "cvtss2si", "cvtss2si", 880 WriteCvtSS2I, SSEPackedSingle>, 881 XS, VEX, VEX_LIG; 882defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 883 "cvtss2si", "cvtss2si", 884 WriteCvtSS2I, SSEPackedSingle>, 885 XS, VEX, VEX_W, VEX_LIG; 886defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 887 "cvtsd2si", "cvtsd2si", 888 WriteCvtSD2I, SSEPackedDouble>, 889 XD, VEX, VEX_LIG; 890defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 891 "cvtsd2si", "cvtsd2si", 892 WriteCvtSD2I, SSEPackedDouble>, 893 XD, VEX, VEX_W, VEX_LIG; 894} 895 896// The assembler can recognize rr 64-bit instructions by seeing a rxx 897// register, but the same isn't true when only using memory operands, 898// provide other assembly "l" and "q" forms to address this explicitly 899// where appropriate to do so. 900let isCodeGenOnly = 1 in { 901defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 902 WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 903 VEX_LIG, SIMD_EXC; 904defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 905 WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 906 VEX_W, VEX_LIG, SIMD_EXC; 907defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 908 WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 909 VEX_LIG; 910defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 911 WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 912 VEX_W, VEX_LIG, SIMD_EXC; 913} // isCodeGenOnly = 1 914 915let Predicates = [UseAVX] in { 916 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 917 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 918 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 919 (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 920 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 921 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 922 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 923 (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 924 925 def : Pat<(f32 (any_sint_to_fp GR32:$src)), 926 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 927 def : Pat<(f32 (any_sint_to_fp GR64:$src)), 928 (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 929 def : Pat<(f64 (any_sint_to_fp GR32:$src)), 930 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 931 def : Pat<(f64 (any_sint_to_fp GR64:$src)), 932 (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 933 934 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; 935 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; 936 937 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; 938 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; 939} 940 941let isCodeGenOnly = 1 in { 942defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 943 "cvttss2si", "cvttss2si", 944 WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 945defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 946 "cvttss2si", "cvttss2si", 947 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 948defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 949 "cvttsd2si", "cvttsd2si", 950 WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 951defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 952 "cvttsd2si", "cvttsd2si", 953 WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 954 955defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 956 "cvtss2si", "cvtss2si", 957 WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 958defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 959 "cvtss2si", "cvtss2si", 960 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 961defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 962 "cvtsd2si", "cvtsd2si", 963 WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 964defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 965 "cvtsd2si", "cvtsd2si", 966 WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 967 968defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, 969 "cvtsi2ss", "cvtsi2ss{l}", 970 WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; 971defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, 972 "cvtsi2ss", "cvtsi2ss{q}", 973 WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; 974defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, 975 "cvtsi2sd", "cvtsi2sd{l}", 976 WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; 977defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, 978 "cvtsi2sd", "cvtsi2sd{q}", 979 WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; 980} // isCodeGenOnly = 1 981 982let Predicates = [UseSSE1] in { 983 def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; 984 def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; 985} 986 987let Predicates = [UseSSE2] in { 988 def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; 989 def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; 990} 991 992// Conversion Instructions Intrinsics - Match intrinsics which expect MM 993// and/or XMM operand(s). 994 995multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 996 ValueType DstVT, ValueType SrcVT, SDNode OpNode, 997 Operand memop, PatFrags mem_frags, string asm, 998 X86FoldableSchedWrite sched, Domain d> { 999let ExeDomain = d in { 1000 def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1001 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1002 [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 1003 Sched<[sched]>; 1004 def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1005 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1006 [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>, 1007 Sched<[sched.Folded]>; 1008} 1009} 1010 1011multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1012 RegisterClass DstRC, X86MemOperand x86memop, 1013 string asm, string mem, X86FoldableSchedWrite sched, 1014 Domain d, bit Is2Addr = 1> { 1015let hasSideEffects = 0, ExeDomain = d in { 1016 def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1017 !if(Is2Addr, 1018 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1019 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1020 []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 1021 let mayLoad = 1 in 1022 def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1023 (ins DstRC:$src1, x86memop:$src2), 1024 !if(Is2Addr, 1025 asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 1026 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 1027 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 1028} 1029} 1030 1031let Uses = [MXCSR], mayRaiseFPException = 1 in { 1032let Predicates = [UseAVX] in { 1033defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 1034 X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1035 WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 1036defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 1037 X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1038 WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; 1039} 1040defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 1041 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1042 SSEPackedDouble>, XD; 1043defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 1044 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1045 SSEPackedDouble>, XD, REX_W; 1046} 1047 1048let Predicates = [UseAVX] in { 1049defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1050 i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, 1051 XS, VEX_4V, VEX_LIG, SIMD_EXC; 1052defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1053 i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, 1054 XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 1055defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1056 i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, 1057 XD, VEX_4V, VEX_LIG; 1058defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1059 i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, 1060 XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 1061} 1062let Constraints = "$src1 = $dst" in { 1063 defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1064 i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, 1065 XS, SIMD_EXC; 1066 defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1067 i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, 1068 XS, REX_W, SIMD_EXC; 1069 defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1070 i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, 1071 XD; 1072 defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1073 i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, 1074 XD, REX_W, SIMD_EXC; 1075} 1076 1077def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1078 (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1079def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1080 (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1081def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1082 (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1083def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1084 (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1085 1086def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1087 (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1088def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1089 (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1090 1091def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1092 (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 1093def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1094 (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 1095def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1096 (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 1097def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1098 (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 1099 1100def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1101 (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1102def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1103 (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1104 1105/// SSE 1 Only 1106 1107// Aliases for intrinsics 1108let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1109defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1110 ssmem, sse_load_f32, "cvttss2si", 1111 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 1112defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1113 X86cvtts2Int, ssmem, sse_load_f32, 1114 "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1115 XS, VEX, VEX_LIG, VEX_W; 1116defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1117 sdmem, sse_load_f64, "cvttsd2si", 1118 WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 1119defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1120 X86cvtts2Int, sdmem, sse_load_f64, 1121 "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, 1122 XD, VEX, VEX_LIG, VEX_W; 1123} 1124let Uses = [MXCSR], mayRaiseFPException = 1 in { 1125defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1126 ssmem, sse_load_f32, "cvttss2si", 1127 WriteCvtSS2I, SSEPackedSingle>, XS; 1128defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1129 X86cvtts2Int, ssmem, sse_load_f32, 1130 "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1131 XS, REX_W; 1132defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1133 sdmem, sse_load_f64, "cvttsd2si", 1134 WriteCvtSD2I, SSEPackedDouble>, XD; 1135defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1136 X86cvtts2Int, sdmem, sse_load_f64, 1137 "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, 1138 XD, REX_W; 1139} 1140 1141def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1142 (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1143def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1144 (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1145def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1146 (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1147def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1148 (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1149def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1150 (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1151def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1152 (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1153def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1154 (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1155def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1156 (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1157 1158def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1159 (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1160def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1161 (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1162def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1163 (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1164def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1165 (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1166def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1167 (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1168def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1169 (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1170def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1171 (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1172def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1173 (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1174 1175let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1176defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1177 ssmem, sse_load_f32, "cvtss2si", 1178 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 1179defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1180 ssmem, sse_load_f32, "cvtss2si", 1181 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; 1182} 1183let Uses = [MXCSR], mayRaiseFPException = 1 in { 1184defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1185 ssmem, sse_load_f32, "cvtss2si", 1186 WriteCvtSS2I, SSEPackedSingle>, XS; 1187defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1188 ssmem, sse_load_f32, "cvtss2si", 1189 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; 1190 1191defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 1192 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1193 SSEPackedSingle, WriteCvtI2PS>, 1194 PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1195defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 1196 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1197 SSEPackedSingle, WriteCvtI2PSY>, 1198 PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1199 1200defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 1201 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1202 SSEPackedSingle, WriteCvtI2PS>, 1203 PS, Requires<[UseSSE2]>; 1204} 1205 1206// AVX aliases 1207def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1208 (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1209def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1210 (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1211def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1212 (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1213def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1214 (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1215def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1216 (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1217def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1218 (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1219def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1220 (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1221def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1222 (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1223 1224// SSE aliases 1225def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1226 (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1227def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1228 (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1229def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1230 (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1231def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1232 (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1233def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1234 (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1235def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1236 (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1237def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1238 (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1239def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1240 (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1241 1242/// SSE 2 Only 1243 1244// Convert scalar double to scalar single 1245let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], 1246 ExeDomain = SSEPackedSingle in { 1247def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1248 (ins FR32:$src1, FR64:$src2), 1249 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1250 VEX_4V, VEX_LIG, VEX_WIG, 1251 Sched<[WriteCvtSD2SS]>, SIMD_EXC; 1252let mayLoad = 1 in 1253def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1254 (ins FR32:$src1, f64mem:$src2), 1255 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1256 XD, VEX_4V, VEX_LIG, VEX_WIG, 1257 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 1258} 1259 1260def : Pat<(f32 (any_fpround FR64:$src)), 1261 (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 1262 Requires<[UseAVX]>; 1263 1264let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 1265def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1266 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1267 [(set FR32:$dst, (any_fpround FR64:$src))]>, 1268 Sched<[WriteCvtSD2SS]>, SIMD_EXC; 1269def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1270 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1271 [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, 1272 XD, Requires<[UseSSE2, OptForSize]>, 1273 Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; 1274} 1275 1276let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { 1277def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1278 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1279 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1280 [(set VR128:$dst, 1281 (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1282 XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1283 Sched<[WriteCvtSD2SS]>; 1284def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1285 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1286 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1287 [(set VR128:$dst, 1288 (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1289 XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1290 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1291let Constraints = "$src1 = $dst" in { 1292def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1293 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1294 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1295 [(set VR128:$dst, 1296 (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1297 XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 1298def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1299 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1300 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1301 [(set VR128:$dst, 1302 (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1303 XD, Requires<[UseSSE2]>, 1304 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1305} 1306} 1307 1308// Convert scalar single to scalar double 1309// SSE2 instructions with XS prefix 1310let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 1311def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1312 (ins FR64:$src1, FR32:$src2), 1313 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1314 XS, VEX_4V, VEX_LIG, VEX_WIG, 1315 Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; 1316let mayLoad = 1 in 1317def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1318 (ins FR64:$src1, f32mem:$src2), 1319 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1320 XS, VEX_4V, VEX_LIG, VEX_WIG, 1321 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 1322 Requires<[UseAVX, OptForSize]>, SIMD_EXC; 1323} // isCodeGenOnly = 1, hasSideEffects = 0 1324 1325def : Pat<(f64 (any_fpextend FR32:$src)), 1326 (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 1327def : Pat<(any_fpextend (loadf32 addr:$src)), 1328 (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 1329 1330let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 1331def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1332 "cvtss2sd\t{$src, $dst|$dst, $src}", 1333 [(set FR64:$dst, (any_fpextend FR32:$src))]>, 1334 XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; 1335def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1336 "cvtss2sd\t{$src, $dst|$dst, $src}", 1337 [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, 1338 XS, Requires<[UseSSE2, OptForSize]>, 1339 Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; 1340} // isCodeGenOnly = 1 1341 1342let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, 1343 ExeDomain = SSEPackedSingle in { 1344def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1345 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1346 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1347 []>, XS, VEX_4V, VEX_LIG, VEX_WIG, 1348 Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 1349let mayLoad = 1 in 1350def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1351 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1352 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1353 []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, 1354 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1355let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1356def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1357 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1358 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1359 []>, XS, Requires<[UseSSE2]>, 1360 Sched<[WriteCvtSS2SD]>; 1361let mayLoad = 1 in 1362def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1363 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1364 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1365 []>, XS, Requires<[UseSSE2]>, 1366 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1367} 1368} // hasSideEffects = 0 1369 1370// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 1371// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 1372// vmovs{s,d} instructions 1373let Predicates = [UseAVX] in { 1374def : Pat<(v4f32 (X86Movss 1375 (v4f32 VR128:$dst), 1376 (v4f32 (scalar_to_vector 1377 (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1378 (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1379 1380def : Pat<(v2f64 (X86Movsd 1381 (v2f64 VR128:$dst), 1382 (v2f64 (scalar_to_vector 1383 (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1384 (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1385 1386def : Pat<(v4f32 (X86Movss 1387 (v4f32 VR128:$dst), 1388 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 1389 (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1390 1391def : Pat<(v4f32 (X86Movss 1392 (v4f32 VR128:$dst), 1393 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 1394 (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1395 1396def : Pat<(v4f32 (X86Movss 1397 (v4f32 VR128:$dst), 1398 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 1399 (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1400 1401def : Pat<(v4f32 (X86Movss 1402 (v4f32 VR128:$dst), 1403 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 1404 (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1405 1406def : Pat<(v2f64 (X86Movsd 1407 (v2f64 VR128:$dst), 1408 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 1409 (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1410 1411def : Pat<(v2f64 (X86Movsd 1412 (v2f64 VR128:$dst), 1413 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 1414 (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1415 1416def : Pat<(v2f64 (X86Movsd 1417 (v2f64 VR128:$dst), 1418 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 1419 (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1420 1421def : Pat<(v2f64 (X86Movsd 1422 (v2f64 VR128:$dst), 1423 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 1424 (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1425} // Predicates = [UseAVX] 1426 1427let Predicates = [UseSSE2] in { 1428def : Pat<(v4f32 (X86Movss 1429 (v4f32 VR128:$dst), 1430 (v4f32 (scalar_to_vector 1431 (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1432 (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1433 1434def : Pat<(v2f64 (X86Movsd 1435 (v2f64 VR128:$dst), 1436 (v2f64 (scalar_to_vector 1437 (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1438 (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1439 1440def : Pat<(v2f64 (X86Movsd 1441 (v2f64 VR128:$dst), 1442 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 1443 (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1444 1445def : Pat<(v2f64 (X86Movsd 1446 (v2f64 VR128:$dst), 1447 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 1448 (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1449 1450def : Pat<(v2f64 (X86Movsd 1451 (v2f64 VR128:$dst), 1452 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 1453 (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1454 1455def : Pat<(v2f64 (X86Movsd 1456 (v2f64 VR128:$dst), 1457 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 1458 (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1459} // Predicates = [UseSSE2] 1460 1461let Predicates = [UseSSE1] in { 1462def : Pat<(v4f32 (X86Movss 1463 (v4f32 VR128:$dst), 1464 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 1465 (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1466 1467def : Pat<(v4f32 (X86Movss 1468 (v4f32 VR128:$dst), 1469 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 1470 (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1471 1472def : Pat<(v4f32 (X86Movss 1473 (v4f32 VR128:$dst), 1474 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 1475 (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1476 1477def : Pat<(v4f32 (X86Movss 1478 (v4f32 VR128:$dst), 1479 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 1480 (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1481} // Predicates = [UseSSE1] 1482 1483let Predicates = [HasAVX, NoVLX] in { 1484// Convert packed single/double fp to doubleword 1485def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1486 "cvtps2dq\t{$src, $dst|$dst, $src}", 1487 [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1488 VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; 1489def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1490 "cvtps2dq\t{$src, $dst|$dst, $src}", 1491 [(set VR128:$dst, 1492 (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 1493 VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; 1494def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1495 "cvtps2dq\t{$src, $dst|$dst, $src}", 1496 [(set VR256:$dst, 1497 (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 1498 VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; 1499def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1500 "cvtps2dq\t{$src, $dst|$dst, $src}", 1501 [(set VR256:$dst, 1502 (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 1503 VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; 1504} 1505def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1506 "cvtps2dq\t{$src, $dst|$dst, $src}", 1507 [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1508 Sched<[WriteCvtPS2I]>, SIMD_EXC; 1509def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1510 "cvtps2dq\t{$src, $dst|$dst, $src}", 1511 [(set VR128:$dst, 1512 (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 1513 Sched<[WriteCvtPS2ILd]>, SIMD_EXC; 1514 1515 1516// Convert Packed Double FP to Packed DW Integers 1517let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1518// The assembler can recognize rr 256-bit instructions by seeing a ymm 1519// register, but the same isn't true when using memory operands instead. 1520// Provide other assembly rr and rm forms to address this explicitly. 1521def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1522 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1523 [(set VR128:$dst, 1524 (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1525 VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1526 1527// XMM only 1528def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1529 "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 1530 [(set VR128:$dst, 1531 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 1532 Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1533 1534// YMM only 1535def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1536 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1537 [(set VR128:$dst, 1538 (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 1539 VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1540def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1541 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1542 [(set VR128:$dst, 1543 (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 1544 VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1545} 1546 1547def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1548 (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1549def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 1550 (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1551 1552def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1553 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1554 [(set VR128:$dst, 1555 (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 1556 Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 1557def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1558 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1559 [(set VR128:$dst, 1560 (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1561 Sched<[WriteCvtPD2I]>, SIMD_EXC; 1562 1563// Convert with truncation packed single/double fp to doubleword 1564// SSE2 packed instructions with XS prefix 1565let Uses = [MXCSR], mayRaiseFPException = 1 in { 1566let Predicates = [HasAVX, NoVLX] in { 1567def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1568 "cvttps2dq\t{$src, $dst|$dst, $src}", 1569 [(set VR128:$dst, 1570 (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 1571 VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 1572def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1573 "cvttps2dq\t{$src, $dst|$dst, $src}", 1574 [(set VR128:$dst, 1575 (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, 1576 VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 1577def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1578 "cvttps2dq\t{$src, $dst|$dst, $src}", 1579 [(set VR256:$dst, 1580 (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, 1581 VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 1582def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1583 "cvttps2dq\t{$src, $dst|$dst, $src}", 1584 [(set VR256:$dst, 1585 (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, 1586 VEX, VEX_L, 1587 Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 1588} 1589 1590def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1591 "cvttps2dq\t{$src, $dst|$dst, $src}", 1592 [(set VR128:$dst, 1593 (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 1594 Sched<[WriteCvtPS2I]>; 1595def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1596 "cvttps2dq\t{$src, $dst|$dst, $src}", 1597 [(set VR128:$dst, 1598 (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, 1599 Sched<[WriteCvtPS2ILd]>; 1600} 1601 1602// The assembler can recognize rr 256-bit instructions by seeing a ymm 1603// register, but the same isn't true when using memory operands instead. 1604// Provide other assembly rr and rm forms to address this explicitly. 1605let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1606// XMM only 1607def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1608 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1609 [(set VR128:$dst, 1610 (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1611 VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1612def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1613 "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 1614 [(set VR128:$dst, 1615 (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, 1616 VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1617 1618// YMM only 1619def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1620 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1621 [(set VR128:$dst, 1622 (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, 1623 VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1624def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1625 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 1626 [(set VR128:$dst, 1627 (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, 1628 VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1629} // Predicates = [HasAVX, NoVLX] 1630 1631def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 1632 (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1633def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 1634 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1635 1636let Predicates = [HasAVX, NoVLX] in { 1637 def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))), 1638 (VCVTTPD2DQYrr VR256:$src)>; 1639 def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))), 1640 (VCVTTPD2DQYrm addr:$src)>; 1641} 1642 1643def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1644 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1645 [(set VR128:$dst, 1646 (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1647 Sched<[WriteCvtPD2I]>, SIMD_EXC; 1648def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 1649 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1650 [(set VR128:$dst, 1651 (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, 1652 Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 1653 1654// Convert packed single to packed double 1655let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1656 // SSE2 instructions without OpSize prefix 1657def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1658 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1659 [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1660 PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; 1661def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1662 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1663 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1664 PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; 1665def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1666 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1667 [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, 1668 PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; 1669def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 1670 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1671 [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 1672 PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; 1673} 1674 1675let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { 1676def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1677 "cvtps2pd\t{$src, $dst|$dst, $src}", 1678 [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1679 PS, Sched<[WriteCvtPS2PD]>; 1680def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1681 "cvtps2pd\t{$src, $dst|$dst, $src}", 1682 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1683 PS, Sched<[WriteCvtPS2PD.Folded]>; 1684} 1685 1686// Convert Packed DW Integers to Packed Double FP 1687let Predicates = [HasAVX, NoVLX] in { 1688let hasSideEffects = 0, mayLoad = 1 in 1689def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1690 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1691 [(set VR128:$dst, 1692 (v2f64 (X86any_VSintToFP 1693 (bc_v4i32 1694 (v2i64 (scalar_to_vector 1695 (loadi64 addr:$src)))))))]>, 1696 VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; 1697def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1698 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1699 [(set VR128:$dst, 1700 (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 1701 VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; 1702def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 1703 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1704 [(set VR256:$dst, 1705 (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>, 1706 VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 1707 VEX_WIG; 1708def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1709 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1710 [(set VR256:$dst, 1711 (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>, 1712 VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; 1713} 1714 1715let hasSideEffects = 0, mayLoad = 1 in 1716def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1717 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1718 [(set VR128:$dst, 1719 (v2f64 (X86any_VSintToFP 1720 (bc_v4i32 1721 (v2i64 (scalar_to_vector 1722 (loadi64 addr:$src)))))))]>, 1723 Sched<[WriteCvtI2PDLd]>; 1724def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1725 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1726 [(set VR128:$dst, 1727 (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 1728 Sched<[WriteCvtI2PD]>; 1729 1730// AVX register conversion intrinsics 1731let Predicates = [HasAVX, NoVLX] in { 1732 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1733 (VCVTDQ2PDrm addr:$src)>; 1734} // Predicates = [HasAVX, NoVLX] 1735 1736// SSE2 register conversion intrinsics 1737let Predicates = [UseSSE2] in { 1738 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1739 (CVTDQ2PDrm addr:$src)>; 1740} // Predicates = [UseSSE2] 1741 1742// Convert packed double to packed single 1743// The assembler can recognize rr 256-bit instructions by seeing a ymm 1744// register, but the same isn't true when using memory operands instead. 1745// Provide other assembly rr and rm forms to address this explicitly. 1746let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1747// XMM only 1748def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1749 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1750 [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, 1751 VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; 1752def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1753 "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 1754 [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>, 1755 VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; 1756 1757def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1758 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1759 [(set VR128:$dst, (X86any_vfpround VR256:$src))]>, 1760 VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; 1761def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1762 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 1763 [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>, 1764 VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; 1765} // Predicates = [HasAVX, NoVLX] 1766 1767def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 1768 (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 1769def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 1770 (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 1771 1772def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1773 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1774 [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, 1775 Sched<[WriteCvtPD2PS]>, SIMD_EXC; 1776def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1777 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1778 [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>, 1779 Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; 1780 1781//===----------------------------------------------------------------------===// 1782// SSE 1 & 2 - Compare Instructions 1783//===----------------------------------------------------------------------===// 1784 1785// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 1786multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 1787 Operand memop, SDNode OpNode, ValueType VT, 1788 PatFrag ld_frag, string asm, 1789 X86FoldableSchedWrite sched, 1790 PatFrags mem_frags> { 1791 def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 1792 (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm, 1793 [(set VR128:$dst, (OpNode (VT VR128:$src1), 1794 VR128:$src2, timm:$cc))]>, 1795 Sched<[sched]>, SIMD_EXC; 1796 let mayLoad = 1 in 1797 def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 1798 (ins VR128:$src1, memop:$src2, u8imm:$cc), asm, 1799 [(set VR128:$dst, (OpNode (VT VR128:$src1), 1800 (mem_frags addr:$src2), timm:$cc))]>, 1801 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1802 1803 let isCodeGenOnly = 1 in { 1804 let isCommutable = 1 in 1805 def rr : SIi8<0xC2, MRMSrcReg, 1806 (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1807 [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>, 1808 Sched<[sched]>, SIMD_EXC; 1809 def rm : SIi8<0xC2, MRMSrcMem, 1810 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1811 [(set RC:$dst, (OpNode RC:$src1, 1812 (ld_frag addr:$src2), timm:$cc))]>, 1813 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1814 } 1815} 1816 1817let ExeDomain = SSEPackedSingle in 1818defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 1819 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1820 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 1821 XS, VEX_4V, VEX_LIG, VEX_WIG; 1822let ExeDomain = SSEPackedDouble in 1823defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 1824 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1825 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 1826 XD, VEX_4V, VEX_LIG, VEX_WIG; 1827 1828let Constraints = "$src1 = $dst" in { 1829 let ExeDomain = SSEPackedSingle in 1830 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 1831 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1832 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; 1833 let ExeDomain = SSEPackedDouble in 1834 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 1835 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1836 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; 1837} 1838 1839// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 1840multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode, 1841 ValueType vt, X86MemOperand x86memop, 1842 PatFrag ld_frag, string OpcodeStr, Domain d, 1843 X86FoldableSchedWrite sched = WriteFComX> { 1844 let ExeDomain = d in { 1845 def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1846 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1847 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1848 Sched<[sched]>, SIMD_EXC; 1849 let mayLoad = 1 in 1850 def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1851 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1852 [(set EFLAGS, (OpNode (vt RC:$src1), 1853 (ld_frag addr:$src2)))]>, 1854 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1855} 1856} 1857 1858// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 1859multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 1860 ValueType vt, Operand memop, 1861 PatFrags mem_frags, string OpcodeStr, 1862 Domain d, 1863 X86FoldableSchedWrite sched = WriteFComX> { 1864let ExeDomain = d in { 1865 def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1866 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1867 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1868 Sched<[sched]>, SIMD_EXC; 1869let mayLoad = 1 in 1870 def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 1871 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1872 [(set EFLAGS, (OpNode (vt RC:$src1), 1873 (mem_frags addr:$src2)))]>, 1874 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1875} 1876} 1877 1878let Defs = [EFLAGS] in { 1879 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1880 "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1881 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1882 "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1883 defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1884 "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1885 defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1886 "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1887 1888 let isCodeGenOnly = 1 in { 1889 defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1890 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1891 defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1892 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1893 1894 defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1895 sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1896 defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1897 sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1898 } 1899 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1900 "ucomiss", SSEPackedSingle>, PS; 1901 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1902 "ucomisd", SSEPackedDouble>, PD; 1903 defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1904 "comiss", SSEPackedSingle>, PS; 1905 defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1906 "comisd", SSEPackedDouble>, PD; 1907 1908 let isCodeGenOnly = 1 in { 1909 defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1910 sse_load_f32, "ucomiss", SSEPackedSingle>, PS; 1911 defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1912 sse_load_f64, "ucomisd", SSEPackedDouble>, PD; 1913 1914 defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1915 sse_load_f32, "comiss", SSEPackedSingle>, PS; 1916 defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1917 sse_load_f64, "comisd", SSEPackedDouble>, PD; 1918 } 1919} // Defs = [EFLAGS] 1920 1921// sse12_cmp_packed - sse 1 & 2 compare packed instructions 1922multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 1923 ValueType VT, string asm, 1924 X86FoldableSchedWrite sched, 1925 Domain d, PatFrag ld_frag> { 1926 let isCommutable = 1 in 1927 def rri : PIi8<0xC2, MRMSrcReg, 1928 (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1929 [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, 1930 Sched<[sched]>, SIMD_EXC; 1931 def rmi : PIi8<0xC2, MRMSrcMem, 1932 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1933 [(set RC:$dst, 1934 (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, 1935 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1936} 1937 1938defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1939 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1940 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; 1941defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1942 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1943 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; 1944defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 1945 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1946 SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; 1947defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 1948 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1949 SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; 1950let Constraints = "$src1 = $dst" in { 1951 defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1952 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1953 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; 1954 defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1955 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1956 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; 1957} 1958 1959def CommutableCMPCC : PatLeaf<(timm), [{ 1960 uint64_t Imm = N->getZExtValue() & 0x7; 1961 return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 1962}]>; 1963 1964// Patterns to select compares with loads in first operand. 1965let Predicates = [HasAVX] in { 1966 def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, 1967 CommutableCMPCC:$cc)), 1968 (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; 1969 1970 def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, 1971 CommutableCMPCC:$cc)), 1972 (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; 1973 1974 def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, 1975 CommutableCMPCC:$cc)), 1976 (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 1977 1978 def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, 1979 CommutableCMPCC:$cc)), 1980 (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 1981 1982 def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1983 CommutableCMPCC:$cc)), 1984 (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 1985 1986 def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 1987 CommutableCMPCC:$cc)), 1988 (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 1989} 1990 1991let Predicates = [UseSSE2] in { 1992 def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, 1993 CommutableCMPCC:$cc)), 1994 (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 1995 1996 def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1997 CommutableCMPCC:$cc)), 1998 (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 1999} 2000 2001let Predicates = [UseSSE1] in { 2002 def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, 2003 CommutableCMPCC:$cc)), 2004 (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 2005 2006 def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 2007 CommutableCMPCC:$cc)), 2008 (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 2009} 2010 2011//===----------------------------------------------------------------------===// 2012// SSE 1 & 2 - Shuffle Instructions 2013//===----------------------------------------------------------------------===// 2014 2015/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 2016multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2017 ValueType vt, string asm, PatFrag mem_frag, 2018 X86FoldableSchedWrite sched, Domain d, 2019 bit IsCommutable = 0> { 2020 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2021 (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 2022 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2023 (i8 timm:$src3))))], d>, 2024 Sched<[sched.Folded, sched.ReadAfterFold]>; 2025 let isCommutable = IsCommutable in 2026 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2027 (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 2028 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2029 (i8 timm:$src3))))], d>, 2030 Sched<[sched]>; 2031} 2032 2033let Predicates = [HasAVX, NoVLX] in { 2034 defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2035 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2036 loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 2037 PS, VEX_4V, VEX_WIG; 2038 defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2039 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2040 loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 2041 PS, VEX_4V, VEX_L, VEX_WIG; 2042 defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2043 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2044 loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 2045 PD, VEX_4V, VEX_WIG; 2046 defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2047 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2048 loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 2049 PD, VEX_4V, VEX_L, VEX_WIG; 2050} 2051let Constraints = "$src1 = $dst" in { 2052 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2053 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2054 memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2055 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2056 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2057 memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2058} 2059 2060//===----------------------------------------------------------------------===// 2061// SSE 1 & 2 - Unpack FP Instructions 2062//===----------------------------------------------------------------------===// 2063 2064/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2065multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2066 PatFrag mem_frag, RegisterClass RC, 2067 X86MemOperand x86memop, string asm, 2068 X86FoldableSchedWrite sched, Domain d, 2069 bit IsCommutable = 0> { 2070 let isCommutable = IsCommutable in 2071 def rr : PI<opc, MRMSrcReg, 2072 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2073 asm, [(set RC:$dst, 2074 (vt (OpNode RC:$src1, RC:$src2)))], d>, 2075 Sched<[sched]>; 2076 def rm : PI<opc, MRMSrcMem, 2077 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2078 asm, [(set RC:$dst, 2079 (vt (OpNode RC:$src1, 2080 (mem_frag addr:$src2))))], d>, 2081 Sched<[sched.Folded, sched.ReadAfterFold]>; 2082} 2083 2084let Predicates = [HasAVX, NoVLX] in { 2085defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 2086 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2087 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2088defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 2089 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2090 SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; 2091defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 2092 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2093 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2094defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 2095 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2096 SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; 2097 2098defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 2099 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2100 SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2101defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 2102 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2103 SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2104defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 2105 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2106 SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2107defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 2108 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2109 SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2110}// Predicates = [HasAVX, NoVLX] 2111 2112let Constraints = "$src1 = $dst" in { 2113 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 2114 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2115 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2116 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 2117 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2118 SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2119 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 2120 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2121 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2122 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 2123 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2124 SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; 2125} // Constraints = "$src1 = $dst" 2126 2127let Predicates = [HasAVX1Only] in { 2128 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 2129 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2130 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2131 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2132 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 2133 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2134 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2135 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2136 2137 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2138 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2139 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2140 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2141 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2142 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2143 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2144 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2145} 2146 2147let Predicates = [UseSSE2] in { 2148 // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 2149 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 2150 (v2f64 (simple_load addr:$src2)))), 2151 (MOVHPDrm VR128:$src1, addr:$src2)>; 2152} 2153 2154//===----------------------------------------------------------------------===// 2155// SSE 1 & 2 - Extract Floating-Point Sign mask 2156//===----------------------------------------------------------------------===// 2157 2158/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2159multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 2160 string asm, Domain d> { 2161 def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2162 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2163 [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 2164 Sched<[WriteFMOVMSK]>; 2165} 2166 2167let Predicates = [HasAVX] in { 2168 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2169 SSEPackedSingle>, PS, VEX, VEX_WIG; 2170 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2171 SSEPackedDouble>, PD, VEX, VEX_WIG; 2172 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 2173 SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; 2174 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 2175 SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; 2176 2177 // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2178 def : Pat<(X86movmsk (v4i32 VR128:$src)), 2179 (VMOVMSKPSrr VR128:$src)>; 2180 def : Pat<(X86movmsk (v2i64 VR128:$src)), 2181 (VMOVMSKPDrr VR128:$src)>; 2182 def : Pat<(X86movmsk (v8i32 VR256:$src)), 2183 (VMOVMSKPSYrr VR256:$src)>; 2184 def : Pat<(X86movmsk (v4i64 VR256:$src)), 2185 (VMOVMSKPDYrr VR256:$src)>; 2186} 2187 2188defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2189 SSEPackedSingle>, PS; 2190defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2191 SSEPackedDouble>, PD; 2192 2193let Predicates = [UseSSE2] in { 2194 // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2195 def : Pat<(X86movmsk (v4i32 VR128:$src)), 2196 (MOVMSKPSrr VR128:$src)>; 2197 def : Pat<(X86movmsk (v2i64 VR128:$src)), 2198 (MOVMSKPDrr VR128:$src)>; 2199} 2200 2201//===---------------------------------------------------------------------===// 2202// SSE2 - Packed Integer Logical Instructions 2203//===---------------------------------------------------------------------===// 2204 2205let ExeDomain = SSEPackedInt in { // SSE integer instructions 2206 2207/// PDI_binop_rm - Simple SSE2 binary operator. 2208multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2209 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2210 X86MemOperand x86memop, X86FoldableSchedWrite sched, 2211 bit IsCommutable, bit Is2Addr> { 2212 let isCommutable = IsCommutable in 2213 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2214 (ins RC:$src1, RC:$src2), 2215 !if(Is2Addr, 2216 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2217 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2218 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 2219 Sched<[sched]>; 2220 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2221 (ins RC:$src1, x86memop:$src2), 2222 !if(Is2Addr, 2223 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2224 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2225 [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 2226 Sched<[sched.Folded, sched.ReadAfterFold]>; 2227} 2228} // ExeDomain = SSEPackedInt 2229 2230multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2231 ValueType OpVT128, ValueType OpVT256, 2232 X86SchedWriteWidths sched, bit IsCommutable, 2233 Predicate prd> { 2234let Predicates = [HasAVX, prd] in 2235 defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2236 VR128, load, i128mem, sched.XMM, 2237 IsCommutable, 0>, VEX_4V, VEX_WIG; 2238 2239let Constraints = "$src1 = $dst" in 2240 defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2241 memop, i128mem, sched.XMM, IsCommutable, 1>; 2242 2243let Predicates = [HasAVX2, prd] in 2244 defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2245 OpVT256, VR256, load, i256mem, sched.YMM, 2246 IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; 2247} 2248 2249// These are ordered here for pattern ordering requirements with the fp versions 2250 2251defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2252 SchedWriteVecLogic, 1, NoVLX>; 2253defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2254 SchedWriteVecLogic, 1, NoVLX>; 2255defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2256 SchedWriteVecLogic, 1, NoVLX>; 2257defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2258 SchedWriteVecLogic, 0, NoVLX>; 2259 2260//===----------------------------------------------------------------------===// 2261// SSE 1 & 2 - Logical Instructions 2262//===----------------------------------------------------------------------===// 2263 2264/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2265/// 2266/// There are no patterns here because isel prefers integer versions for SSE2 2267/// and later. There are SSE1 v4f32 patterns later. 2268multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2269 SDNode OpNode, X86SchedWriteWidths sched> { 2270 let Predicates = [HasAVX, NoVLX] in { 2271 defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2272 !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 2273 [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2274 2275 defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2276 !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 2277 [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2278 2279 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2280 !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2281 [], [], 0>, PS, VEX_4V, VEX_WIG; 2282 2283 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2284 !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2285 [], [], 0>, PD, VEX_4V, VEX_WIG; 2286 } 2287 2288 let Constraints = "$src1 = $dst" in { 2289 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2290 !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2291 [], []>, PS; 2292 2293 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2294 !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2295 [], []>, PD; 2296 } 2297} 2298 2299defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>; 2300defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>; 2301defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>; 2302let isCommutable = 0 in 2303 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>; 2304 2305let Predicates = [HasAVX2, NoVLX] in { 2306 def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2307 (VPANDYrr VR256:$src1, VR256:$src2)>; 2308 def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2309 (VPANDYrr VR256:$src1, VR256:$src2)>; 2310 def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2311 (VPANDYrr VR256:$src1, VR256:$src2)>; 2312 2313 def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2314 (VPORYrr VR256:$src1, VR256:$src2)>; 2315 def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2316 (VPORYrr VR256:$src1, VR256:$src2)>; 2317 def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2318 (VPORYrr VR256:$src1, VR256:$src2)>; 2319 2320 def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2321 (VPXORYrr VR256:$src1, VR256:$src2)>; 2322 def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2323 (VPXORYrr VR256:$src1, VR256:$src2)>; 2324 def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2325 (VPXORYrr VR256:$src1, VR256:$src2)>; 2326 2327 def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2328 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2329 def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2330 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2331 def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2332 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2333 2334 def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2335 (VPANDYrm VR256:$src1, addr:$src2)>; 2336 def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2337 (VPANDYrm VR256:$src1, addr:$src2)>; 2338 def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2339 (VPANDYrm VR256:$src1, addr:$src2)>; 2340 2341 def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2342 (VPORYrm VR256:$src1, addr:$src2)>; 2343 def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2344 (VPORYrm VR256:$src1, addr:$src2)>; 2345 def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2346 (VPORYrm VR256:$src1, addr:$src2)>; 2347 2348 def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2349 (VPXORYrm VR256:$src1, addr:$src2)>; 2350 def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2351 (VPXORYrm VR256:$src1, addr:$src2)>; 2352 def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2353 (VPXORYrm VR256:$src1, addr:$src2)>; 2354 2355 def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2356 (VPANDNYrm VR256:$src1, addr:$src2)>; 2357 def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2358 (VPANDNYrm VR256:$src1, addr:$src2)>; 2359 def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2360 (VPANDNYrm VR256:$src1, addr:$src2)>; 2361} 2362 2363// If only AVX1 is supported, we need to handle integer operations with 2364// floating point instructions since the integer versions aren't available. 2365let Predicates = [HasAVX1Only] in { 2366 def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2367 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2368 def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2369 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2370 def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2371 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2372 def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 2373 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2374 2375 def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2376 (VORPSYrr VR256:$src1, VR256:$src2)>; 2377 def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2378 (VORPSYrr VR256:$src1, VR256:$src2)>; 2379 def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2380 (VORPSYrr VR256:$src1, VR256:$src2)>; 2381 def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 2382 (VORPSYrr VR256:$src1, VR256:$src2)>; 2383 2384 def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2385 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2386 def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2387 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2388 def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2389 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2390 def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 2391 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2392 2393 def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2394 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2395 def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2396 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2397 def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2398 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2399 def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 2400 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2401 2402 def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2403 (VANDPSYrm VR256:$src1, addr:$src2)>; 2404 def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2405 (VANDPSYrm VR256:$src1, addr:$src2)>; 2406 def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2407 (VANDPSYrm VR256:$src1, addr:$src2)>; 2408 def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 2409 (VANDPSYrm VR256:$src1, addr:$src2)>; 2410 2411 def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2412 (VORPSYrm VR256:$src1, addr:$src2)>; 2413 def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2414 (VORPSYrm VR256:$src1, addr:$src2)>; 2415 def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2416 (VORPSYrm VR256:$src1, addr:$src2)>; 2417 def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 2418 (VORPSYrm VR256:$src1, addr:$src2)>; 2419 2420 def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2421 (VXORPSYrm VR256:$src1, addr:$src2)>; 2422 def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2423 (VXORPSYrm VR256:$src1, addr:$src2)>; 2424 def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2425 (VXORPSYrm VR256:$src1, addr:$src2)>; 2426 def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 2427 (VXORPSYrm VR256:$src1, addr:$src2)>; 2428 2429 def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2430 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2431 def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2432 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2433 def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2434 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2435 def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 2436 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2437} 2438 2439let Predicates = [HasAVX, NoVLX] in { 2440 def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2441 (VPANDrr VR128:$src1, VR128:$src2)>; 2442 def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2443 (VPANDrr VR128:$src1, VR128:$src2)>; 2444 def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2445 (VPANDrr VR128:$src1, VR128:$src2)>; 2446 2447 def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2448 (VPORrr VR128:$src1, VR128:$src2)>; 2449 def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2450 (VPORrr VR128:$src1, VR128:$src2)>; 2451 def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2452 (VPORrr VR128:$src1, VR128:$src2)>; 2453 2454 def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2455 (VPXORrr VR128:$src1, VR128:$src2)>; 2456 def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2457 (VPXORrr VR128:$src1, VR128:$src2)>; 2458 def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2459 (VPXORrr VR128:$src1, VR128:$src2)>; 2460 2461 def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2462 (VPANDNrr VR128:$src1, VR128:$src2)>; 2463 def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2464 (VPANDNrr VR128:$src1, VR128:$src2)>; 2465 def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2466 (VPANDNrr VR128:$src1, VR128:$src2)>; 2467 2468 def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 2469 (VPANDrm VR128:$src1, addr:$src2)>; 2470 def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 2471 (VPANDrm VR128:$src1, addr:$src2)>; 2472 def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 2473 (VPANDrm VR128:$src1, addr:$src2)>; 2474 2475 def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 2476 (VPORrm VR128:$src1, addr:$src2)>; 2477 def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 2478 (VPORrm VR128:$src1, addr:$src2)>; 2479 def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 2480 (VPORrm VR128:$src1, addr:$src2)>; 2481 2482 def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 2483 (VPXORrm VR128:$src1, addr:$src2)>; 2484 def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 2485 (VPXORrm VR128:$src1, addr:$src2)>; 2486 def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 2487 (VPXORrm VR128:$src1, addr:$src2)>; 2488 2489 def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 2490 (VPANDNrm VR128:$src1, addr:$src2)>; 2491 def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 2492 (VPANDNrm VR128:$src1, addr:$src2)>; 2493 def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 2494 (VPANDNrm VR128:$src1, addr:$src2)>; 2495} 2496 2497let Predicates = [UseSSE2] in { 2498 def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2499 (PANDrr VR128:$src1, VR128:$src2)>; 2500 def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2501 (PANDrr VR128:$src1, VR128:$src2)>; 2502 def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2503 (PANDrr VR128:$src1, VR128:$src2)>; 2504 2505 def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2506 (PORrr VR128:$src1, VR128:$src2)>; 2507 def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2508 (PORrr VR128:$src1, VR128:$src2)>; 2509 def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2510 (PORrr VR128:$src1, VR128:$src2)>; 2511 2512 def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2513 (PXORrr VR128:$src1, VR128:$src2)>; 2514 def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2515 (PXORrr VR128:$src1, VR128:$src2)>; 2516 def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2517 (PXORrr VR128:$src1, VR128:$src2)>; 2518 2519 def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2520 (PANDNrr VR128:$src1, VR128:$src2)>; 2521 def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2522 (PANDNrr VR128:$src1, VR128:$src2)>; 2523 def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2524 (PANDNrr VR128:$src1, VR128:$src2)>; 2525 2526 def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 2527 (PANDrm VR128:$src1, addr:$src2)>; 2528 def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 2529 (PANDrm VR128:$src1, addr:$src2)>; 2530 def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 2531 (PANDrm VR128:$src1, addr:$src2)>; 2532 2533 def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 2534 (PORrm VR128:$src1, addr:$src2)>; 2535 def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 2536 (PORrm VR128:$src1, addr:$src2)>; 2537 def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 2538 (PORrm VR128:$src1, addr:$src2)>; 2539 2540 def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 2541 (PXORrm VR128:$src1, addr:$src2)>; 2542 def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 2543 (PXORrm VR128:$src1, addr:$src2)>; 2544 def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 2545 (PXORrm VR128:$src1, addr:$src2)>; 2546 2547 def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 2548 (PANDNrm VR128:$src1, addr:$src2)>; 2549 def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 2550 (PANDNrm VR128:$src1, addr:$src2)>; 2551 def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 2552 (PANDNrm VR128:$src1, addr:$src2)>; 2553} 2554 2555// Patterns for packed operations when we don't have integer type available. 2556def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 2557 (ANDPSrr VR128:$src1, VR128:$src2)>; 2558def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 2559 (ORPSrr VR128:$src1, VR128:$src2)>; 2560def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 2561 (XORPSrr VR128:$src1, VR128:$src2)>; 2562def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 2563 (ANDNPSrr VR128:$src1, VR128:$src2)>; 2564 2565def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 2566 (ANDPSrm VR128:$src1, addr:$src2)>; 2567def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 2568 (ORPSrm VR128:$src1, addr:$src2)>; 2569def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 2570 (XORPSrm VR128:$src1, addr:$src2)>; 2571def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 2572 (ANDNPSrm VR128:$src1, addr:$src2)>; 2573 2574//===----------------------------------------------------------------------===// 2575// SSE 1 & 2 - Arithmetic Instructions 2576//===----------------------------------------------------------------------===// 2577 2578/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2579/// vector forms. 2580/// 2581/// In addition, we also have a special variant of the scalar form here to 2582/// represent the associated intrinsic operation. This form is unlike the 2583/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2584/// and leaves the top elements unmodified (therefore these cannot be commuted). 2585/// 2586/// These three forms can each be reg+reg or reg+mem. 2587/// 2588 2589/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2590/// classes below 2591multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2592 SDPatternOperator OpNode, X86SchedWriteSizes sched> { 2593let Uses = [MXCSR], mayRaiseFPException = 1 in { 2594 let Predicates = [HasAVX, NoVLX] in { 2595 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2596 VR128, v4f32, f128mem, loadv4f32, 2597 SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; 2598 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2599 VR128, v2f64, f128mem, loadv2f64, 2600 SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; 2601 2602 defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 2603 OpNode, VR256, v8f32, f256mem, loadv8f32, 2604 SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2605 defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 2606 OpNode, VR256, v4f64, f256mem, loadv4f64, 2607 SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2608 } 2609 2610 let Constraints = "$src1 = $dst" in { 2611 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2612 v4f32, f128mem, memopv4f32, SSEPackedSingle, 2613 sched.PS.XMM>, PS; 2614 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2615 v2f64, f128mem, memopv2f64, SSEPackedDouble, 2616 sched.PD.XMM>, PD; 2617 } 2618} 2619} 2620 2621multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2622 X86SchedWriteSizes sched> { 2623let Uses = [MXCSR], mayRaiseFPException = 1 in { 2624 defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2625 OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 2626 XS, VEX_4V, VEX_LIG, VEX_WIG; 2627 defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2628 OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 2629 XD, VEX_4V, VEX_LIG, VEX_WIG; 2630 2631 let Constraints = "$src1 = $dst" in { 2632 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2633 OpNode, FR32, f32mem, SSEPackedSingle, 2634 sched.PS.Scl>, XS; 2635 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2636 OpNode, FR64, f64mem, SSEPackedDouble, 2637 sched.PD.Scl>, XD; 2638 } 2639} 2640} 2641 2642multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2643 SDPatternOperator OpNode, 2644 X86SchedWriteSizes sched> { 2645let Uses = [MXCSR], mayRaiseFPException = 1 in { 2646 defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 2647 !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2648 SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; 2649 defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 2650 !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2651 SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; 2652 2653 let Constraints = "$src1 = $dst" in { 2654 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, 2655 !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2656 SSEPackedSingle, sched.PS.Scl>, XS; 2657 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, 2658 !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2659 SSEPackedDouble, sched.PD.Scl>, XD; 2660 } 2661} 2662} 2663 2664// Binary Arithmetic instructions 2665defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2666 basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2667 basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 2668defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2669 basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2670 basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 2671let isCommutable = 0 in { 2672 defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2673 basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2674 basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 2675 defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2676 basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2677 basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 2678 defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2679 basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2680 basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 2681 defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2682 basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2683 basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 2684} 2685 2686let isCodeGenOnly = 1 in { 2687 defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 2688 basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 2689 defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 2690 basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 2691} 2692 2693// Patterns used to select SSE scalar fp arithmetic instructions from 2694// either: 2695// 2696// (1) a scalar fp operation followed by a blend 2697// 2698// The effect is that the backend no longer emits unnecessary vector 2699// insert instructions immediately after SSE scalar fp instructions 2700// like addss or mulss. 2701// 2702// For example, given the following code: 2703// __m128 foo(__m128 A, __m128 B) { 2704// A[0] += B[0]; 2705// return A; 2706// } 2707// 2708// Previously we generated: 2709// addss %xmm0, %xmm1 2710// movss %xmm1, %xmm0 2711// 2712// We now generate: 2713// addss %xmm1, %xmm0 2714// 2715// (2) a vector packed single/double fp operation followed by a vector insert 2716// 2717// The effect is that the backend converts the packed fp instruction 2718// followed by a vector insert into a single SSE scalar fp instruction. 2719// 2720// For example, given the following code: 2721// __m128 foo(__m128 A, __m128 B) { 2722// __m128 C = A + B; 2723// return (__m128) {c[0], a[1], a[2], a[3]}; 2724// } 2725// 2726// Previously we generated: 2727// addps %xmm0, %xmm1 2728// movss %xmm1, %xmm0 2729// 2730// We now generate: 2731// addss %xmm1, %xmm0 2732 2733// TODO: Some canonicalization in lowering would simplify the number of 2734// patterns we have to try to match. 2735multiclass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move, 2736 ValueType VT, ValueType EltTy, 2737 RegisterClass RC, PatFrag ld_frag, 2738 Predicate BasePredicate> { 2739 let Predicates = [BasePredicate] in { 2740 // extracted scalar math op with insert via movss/movsd 2741 def : Pat<(VT (Move (VT VR128:$dst), 2742 (VT (scalar_to_vector 2743 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2744 RC:$src))))), 2745 (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 2746 (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2747 def : Pat<(VT (Move (VT VR128:$dst), 2748 (VT (scalar_to_vector 2749 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2750 (ld_frag addr:$src)))))), 2751 (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2752 } 2753 2754 // Repeat for AVX versions of the instructions. 2755 let Predicates = [UseAVX] in { 2756 // extracted scalar math op with insert via movss/movsd 2757 def : Pat<(VT (Move (VT VR128:$dst), 2758 (VT (scalar_to_vector 2759 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2760 RC:$src))))), 2761 (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 2762 (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2763 def : Pat<(VT (Move (VT VR128:$dst), 2764 (VT (scalar_to_vector 2765 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2766 (ld_frag addr:$src)))))), 2767 (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2768 } 2769} 2770 2771defm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2772defm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2773defm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2774defm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2775 2776defm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2777defm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2778defm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2779defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2780 2781/// Unop Arithmetic 2782/// In addition, we also have a special variant of the scalar form here to 2783/// represent the associated intrinsic operation. This form is unlike the 2784/// plain scalar form, in that it takes an entire vector (instead of a 2785/// scalar) and leaves the top elements undefined. 2786/// 2787/// And, we have a special variant form for a full-vector intrinsic form. 2788 2789/// sse_fp_unop_s - SSE1 unops in scalar form 2790/// For the non-AVX defs, we need $src1 to be tied to $dst because 2791/// the HW instructions are 2 operand / destructive. 2792multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2793 ValueType ScalarVT, X86MemOperand x86memop, 2794 Operand intmemop, SDPatternOperator OpNode, Domain d, 2795 X86FoldableSchedWrite sched, Predicate target> { 2796 let isCodeGenOnly = 1, hasSideEffects = 0 in { 2797 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 2798 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2799 [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 2800 Requires<[target]>; 2801 let mayLoad = 1 in 2802 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 2803 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2804 [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 2805 Sched<[sched.Folded]>, 2806 Requires<[target, OptForSize]>; 2807 } 2808 2809 let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 2810 def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 2811 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2812 Sched<[sched]>; 2813 let mayLoad = 1 in 2814 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 2815 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2816 Sched<[sched.Folded, sched.ReadAfterFold]>; 2817 } 2818 2819} 2820 2821multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt, 2822 PatFrags mem_frags, Intrinsic Intr, 2823 Predicate target, string Suffix> { 2824 let Predicates = [target] in { 2825 // These are unary operations, but they are modeled as having 2 source operands 2826 // because the high elements of the destination are unchanged in SSE. 2827 def : Pat<(Intr VR128:$src), 2828 (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 2829 } 2830 // We don't want to fold scalar loads into these instructions unless 2831 // optimizing for size. This is because the folded instruction will have a 2832 // partial register update, while the unfolded sequence will not, e.g. 2833 // movss mem, %xmm0 2834 // rcpss %xmm0, %xmm0 2835 // which has a clobber before the rcp, vs. 2836 // rcpss mem, %xmm0 2837 let Predicates = [target, OptForSize] in { 2838 def : Pat<(Intr (mem_frags addr:$src2)), 2839 (!cast<Instruction>(NAME#m_Int) 2840 (vt (IMPLICIT_DEF)), addr:$src2)>; 2841 } 2842} 2843 2844multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, PatFrags mem_frags, 2845 Intrinsic Intr, Predicate target> { 2846 let Predicates = [target] in { 2847 def : Pat<(Intr VR128:$src), 2848 (!cast<Instruction>(NAME#r_Int) VR128:$src, 2849 VR128:$src)>; 2850 } 2851 let Predicates = [target, OptForSize] in { 2852 def : Pat<(Intr (mem_frags addr:$src2)), 2853 (!cast<Instruction>(NAME#m_Int) 2854 (vt (IMPLICIT_DEF)), addr:$src2)>; 2855 } 2856} 2857 2858multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2859 ValueType ScalarVT, X86MemOperand x86memop, 2860 Operand intmemop, SDPatternOperator OpNode, Domain d, 2861 X86FoldableSchedWrite sched, Predicate target> { 2862 let isCodeGenOnly = 1, hasSideEffects = 0 in { 2863 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 2864 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2865 [], d>, Sched<[sched]>; 2866 let mayLoad = 1 in 2867 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2868 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2869 [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2870 } 2871 let hasSideEffects = 0, ExeDomain = d in { 2872 def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 2873 (ins VR128:$src1, VR128:$src2), 2874 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2875 []>, Sched<[sched]>; 2876 let mayLoad = 1 in 2877 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 2878 (ins VR128:$src1, intmemop:$src2), 2879 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2880 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2881 } 2882 2883 // We don't want to fold scalar loads into these instructions unless 2884 // optimizing for size. This is because the folded instruction will have a 2885 // partial register update, while the unfolded sequence will not, e.g. 2886 // vmovss mem, %xmm0 2887 // vrcpss %xmm0, %xmm0, %xmm0 2888 // which has a clobber before the rcp, vs. 2889 // vrcpss mem, %xmm0, %xmm0 2890 // TODO: In theory, we could fold the load, and avoid the stall caused by 2891 // the partial register store, either in BreakFalseDeps or with smarter RA. 2892 let Predicates = [target] in { 2893 def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 2894 (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 2895 } 2896 let Predicates = [target, OptForSize] in { 2897 def : Pat<(ScalarVT (OpNode (load addr:$src))), 2898 (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 2899 addr:$src)>; 2900 } 2901} 2902 2903/// sse1_fp_unop_p - SSE1 unops in packed form. 2904multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2905 X86SchedWriteWidths sched, list<Predicate> prds> { 2906let Predicates = prds in { 2907 def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2908 !strconcat("v", OpcodeStr, 2909 "ps\t{$src, $dst|$dst, $src}"), 2910 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2911 VEX, Sched<[sched.XMM]>, VEX_WIG; 2912 def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2913 !strconcat("v", OpcodeStr, 2914 "ps\t{$src, $dst|$dst, $src}"), 2915 [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 2916 VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2917 def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2918 !strconcat("v", OpcodeStr, 2919 "ps\t{$src, $dst|$dst, $src}"), 2920 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 2921 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2922 def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2923 !strconcat("v", OpcodeStr, 2924 "ps\t{$src, $dst|$dst, $src}"), 2925 [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 2926 VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2927} 2928 2929 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2930 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2931 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2932 Sched<[sched.XMM]>; 2933 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2934 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2935 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 2936 Sched<[sched.XMM.Folded]>; 2937} 2938 2939/// sse2_fp_unop_p - SSE2 unops in vector forms. 2940multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2941 SDPatternOperator OpNode, X86SchedWriteWidths sched> { 2942let Predicates = [HasAVX, NoVLX] in { 2943 def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2944 !strconcat("v", OpcodeStr, 2945 "pd\t{$src, $dst|$dst, $src}"), 2946 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2947 VEX, Sched<[sched.XMM]>, VEX_WIG; 2948 def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2949 !strconcat("v", OpcodeStr, 2950 "pd\t{$src, $dst|$dst, $src}"), 2951 [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 2952 VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2953 def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2954 !strconcat("v", OpcodeStr, 2955 "pd\t{$src, $dst|$dst, $src}"), 2956 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 2957 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2958 def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2959 !strconcat("v", OpcodeStr, 2960 "pd\t{$src, $dst|$dst, $src}"), 2961 [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 2962 VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2963} 2964 2965 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2966 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2967 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2968 Sched<[sched.XMM]>; 2969 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2970 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2971 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 2972 Sched<[sched.XMM.Folded]>; 2973} 2974 2975multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode, 2976 X86SchedWriteWidths sched, Predicate AVXTarget> { 2977 defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 2978 !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 2979 UseSSE1, "SS">, XS; 2980 defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32, 2981 !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 2982 AVXTarget>, 2983 XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; 2984} 2985 2986multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2987 X86SchedWriteWidths sched, Predicate AVXTarget> { 2988 defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32, f32mem, 2989 ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; 2990 defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32, 2991 f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 2992 XS, VEX_4V, VEX_LIG, VEX_WIG; 2993} 2994 2995multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2996 X86SchedWriteWidths sched, Predicate AVXTarget> { 2997 defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64, f64mem, 2998 sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; 2999 defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64, 3000 f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 3001 XD, VEX_4V, VEX_LIG, VEX_WIG; 3002} 3003 3004// Square root. 3005defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, 3006 sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 3007 sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, 3008 sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; 3009 3010// Reciprocal approximations. Note that these typically require refinement 3011// in order to obtain suitable precision. 3012defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 3013 sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 3014 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 3015defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 3016 sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 3017 sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 3018 3019// There is no f64 version of the reciprocal approximation instructions. 3020 3021multiclass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move, 3022 ValueType VT, Predicate BasePredicate> { 3023 let Predicates = [BasePredicate] in { 3024 def : Pat<(VT (Move VT:$dst, (scalar_to_vector 3025 (OpNode (extractelt VT:$src, 0))))), 3026 (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3027 } 3028 3029 // Repeat for AVX versions of the instructions. 3030 let Predicates = [UseAVX] in { 3031 def : Pat<(VT (Move VT:$dst, (scalar_to_vector 3032 (OpNode (extractelt VT:$src, 0))))), 3033 (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3034 } 3035} 3036 3037defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 3038defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 3039 3040multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 3041 SDNode Move, ValueType VT, 3042 Predicate BasePredicate> { 3043 let Predicates = [BasePredicate] in { 3044 def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 3045 (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3046 } 3047 3048 // Repeat for AVX versions of the instructions. 3049 let Predicates = [HasAVX] in { 3050 def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 3051 (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3052 } 3053} 3054 3055defm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 3056 v4f32, UseSSE1>; 3057defm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 3058 v4f32, UseSSE1>; 3059 3060 3061//===----------------------------------------------------------------------===// 3062// SSE 1 & 2 - Non-temporal stores 3063//===----------------------------------------------------------------------===// 3064 3065let AddedComplexity = 400 in { // Prefer non-temporal versions 3066let Predicates = [HasAVX, NoVLX] in { 3067let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3068def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3069 (ins f128mem:$dst, VR128:$src), 3070 "movntps\t{$src, $dst|$dst, $src}", 3071 [(alignednontemporalstore (v4f32 VR128:$src), 3072 addr:$dst)]>, VEX, VEX_WIG; 3073def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3074 (ins f128mem:$dst, VR128:$src), 3075 "movntpd\t{$src, $dst|$dst, $src}", 3076 [(alignednontemporalstore (v2f64 VR128:$src), 3077 addr:$dst)]>, VEX, VEX_WIG; 3078} // SchedRW 3079 3080let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 3081def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3082 (ins f256mem:$dst, VR256:$src), 3083 "movntps\t{$src, $dst|$dst, $src}", 3084 [(alignednontemporalstore (v8f32 VR256:$src), 3085 addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3086def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3087 (ins f256mem:$dst, VR256:$src), 3088 "movntpd\t{$src, $dst|$dst, $src}", 3089 [(alignednontemporalstore (v4f64 VR256:$src), 3090 addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3091} // SchedRW 3092 3093let ExeDomain = SSEPackedInt in { 3094def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3095 (ins i128mem:$dst, VR128:$src), 3096 "movntdq\t{$src, $dst|$dst, $src}", 3097 [(alignednontemporalstore (v2i64 VR128:$src), 3098 addr:$dst)]>, VEX, VEX_WIG, 3099 Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 3100def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3101 (ins i256mem:$dst, VR256:$src), 3102 "movntdq\t{$src, $dst|$dst, $src}", 3103 [(alignednontemporalstore (v4i64 VR256:$src), 3104 addr:$dst)]>, VEX, VEX_L, VEX_WIG, 3105 Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 3106} // ExeDomain 3107} // Predicates 3108 3109let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3110def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3111 "movntps\t{$src, $dst|$dst, $src}", 3112 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 3113def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3114 "movntpd\t{$src, $dst|$dst, $src}", 3115 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 3116} // SchedRW 3117 3118let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 3119def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3120 "movntdq\t{$src, $dst|$dst, $src}", 3121 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 3122 3123let SchedRW = [WriteStoreNT] in { 3124// There is no AVX form for instructions below this point 3125def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3126 "movnti{l}\t{$src, $dst|$dst, $src}", 3127 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 3128 PS, Requires<[HasSSE2]>; 3129def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3130 "movnti{q}\t{$src, $dst|$dst, $src}", 3131 [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 3132 PS, Requires<[HasSSE2]>; 3133} // SchedRW = [WriteStoreNT] 3134 3135let Predicates = [HasAVX, NoVLX] in { 3136 def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 3137 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3138 def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 3139 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3140 def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 3141 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3142 3143 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3144 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3145 def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3146 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3147 def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3148 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3149} 3150 3151let Predicates = [UseSSE2] in { 3152 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3153 (MOVNTDQmr addr:$dst, VR128:$src)>; 3154 def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3155 (MOVNTDQmr addr:$dst, VR128:$src)>; 3156 def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3157 (MOVNTDQmr addr:$dst, VR128:$src)>; 3158} 3159 3160} // AddedComplexity 3161 3162//===----------------------------------------------------------------------===// 3163// SSE 1 & 2 - Prefetch and memory fence 3164//===----------------------------------------------------------------------===// 3165 3166// Prefetch intrinsic. 3167let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 3168def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3169 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; 3170def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3171 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; 3172def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3173 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; 3174def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3175 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; 3176} 3177 3178// FIXME: How should flush instruction be modeled? 3179let SchedRW = [WriteLoad] in { 3180// Flush cache 3181def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3182 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3183 PS, Requires<[HasSSE2]>; 3184} 3185 3186let SchedRW = [WriteNop] in { 3187// Pause. This "instruction" is encoded as "rep; nop", so even though it 3188// was introduced with SSE2, it's backward compatible. 3189def PAUSE : I<0x90, RawFrm, (outs), (ins), 3190 "pause", [(int_x86_sse2_pause)]>, OBXS; 3191} 3192 3193let SchedRW = [WriteFence] in { 3194// Load, store, and memory fence 3195// TODO: As with mfence, we may want to ease the availability of sfence/lfence 3196// to include any 64-bit target. 3197def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 3198 PS, Requires<[HasSSE1]>; 3199def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 3200 PS, Requires<[HasSSE2]>; 3201def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 3202 PS, Requires<[HasMFence]>; 3203} // SchedRW 3204 3205def : Pat<(X86MFence), (MFENCE)>; 3206 3207//===----------------------------------------------------------------------===// 3208// SSE 1 & 2 - Load/Store XCSR register 3209//===----------------------------------------------------------------------===// 3210 3211let mayLoad=1, hasSideEffects=1 in 3212def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3213 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3214 VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; 3215let mayStore=1, hasSideEffects=1 in 3216def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3217 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3218 VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; 3219 3220let mayLoad=1, hasSideEffects=1 in 3221def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 3222 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3223 PS, Sched<[WriteLDMXCSR]>; 3224let mayStore=1, hasSideEffects=1 in 3225def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3226 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3227 PS, Sched<[WriteSTMXCSR]>; 3228 3229//===---------------------------------------------------------------------===// 3230// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3231//===---------------------------------------------------------------------===// 3232 3233let ExeDomain = SSEPackedInt in { // SSE integer instructions 3234 3235let hasSideEffects = 0 in { 3236def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3237 "movdqa\t{$src, $dst|$dst, $src}", []>, 3238 Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3239def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3240 "movdqu\t{$src, $dst|$dst, $src}", []>, 3241 Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3242def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3243 "movdqa\t{$src, $dst|$dst, $src}", []>, 3244 Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3245def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3246 "movdqu\t{$src, $dst|$dst, $src}", []>, 3247 Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3248} 3249 3250// For Disassembler 3251let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3252def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3253 "movdqa\t{$src, $dst|$dst, $src}", []>, 3254 Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3255 VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; 3256def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3257 "movdqa\t{$src, $dst|$dst, $src}", []>, 3258 Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3259 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; 3260def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3261 "movdqu\t{$src, $dst|$dst, $src}", []>, 3262 Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3263 VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; 3264def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3265 "movdqu\t{$src, $dst|$dst, $src}", []>, 3266 Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3267 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; 3268} 3269 3270let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3271 hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3272def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3273 "movdqa\t{$src, $dst|$dst, $src}", 3274 [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 3275 Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 3276def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3277 "movdqa\t{$src, $dst|$dst, $src}", []>, 3278 Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3279 VEX, VEX_L, VEX_WIG; 3280def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3281 "vmovdqu\t{$src, $dst|$dst, $src}", 3282 [(set VR128:$dst, (loadv2i64 addr:$src))]>, 3283 Sched<[SchedWriteVecMoveLS.XMM.RM]>, 3284 XS, VEX, VEX_WIG; 3285def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3286 "vmovdqu\t{$src, $dst|$dst, $src}", []>, 3287 Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3288 XS, VEX, VEX_L, VEX_WIG; 3289} 3290 3291let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3292def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3293 (ins i128mem:$dst, VR128:$src), 3294 "movdqa\t{$src, $dst|$dst, $src}", 3295 [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 3296 Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; 3297def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3298 (ins i256mem:$dst, VR256:$src), 3299 "movdqa\t{$src, $dst|$dst, $src}", []>, 3300 Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; 3301def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3302 "vmovdqu\t{$src, $dst|$dst, $src}", 3303 [(store (v2i64 VR128:$src), addr:$dst)]>, 3304 Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; 3305def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3306 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 3307 Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; 3308} 3309 3310let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 3311let hasSideEffects = 0 in { 3312def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3313 "movdqa\t{$src, $dst|$dst, $src}", []>; 3314 3315def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3316 "movdqu\t{$src, $dst|$dst, $src}", []>, 3317 XS, Requires<[UseSSE2]>; 3318} 3319 3320// For Disassembler 3321let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3322def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3323 "movdqa\t{$src, $dst|$dst, $src}", []>, 3324 FoldGenData<"MOVDQArr">; 3325 3326def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3327 "movdqu\t{$src, $dst|$dst, $src}", []>, 3328 XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">; 3329} 3330} // SchedRW 3331 3332let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3333 hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 3334def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3335 "movdqa\t{$src, $dst|$dst, $src}", 3336 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 3337def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3338 "movdqu\t{$src, $dst|$dst, $src}", 3339 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 3340 XS, Requires<[UseSSE2]>; 3341} 3342 3343let mayStore = 1, hasSideEffects = 0, 3344 SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 3345def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3346 "movdqa\t{$src, $dst|$dst, $src}", 3347 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 3348def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3349 "movdqu\t{$src, $dst|$dst, $src}", 3350 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 3351 XS, Requires<[UseSSE2]>; 3352} 3353 3354} // ExeDomain = SSEPackedInt 3355 3356// Reversed version with ".s" suffix for GAS compatibility. 3357def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3358 (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3359def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3360 (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 3361def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3362 (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3363def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3364 (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 3365 3366// Reversed version with ".s" suffix for GAS compatibility. 3367def : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 3368 (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3369def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 3370 (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3371 3372let Predicates = [HasAVX, NoVLX] in { 3373 // Additional patterns for other integer sizes. 3374 def : Pat<(alignedloadv4i32 addr:$src), 3375 (VMOVDQArm addr:$src)>; 3376 def : Pat<(alignedloadv8i16 addr:$src), 3377 (VMOVDQArm addr:$src)>; 3378 def : Pat<(alignedloadv16i8 addr:$src), 3379 (VMOVDQArm addr:$src)>; 3380 def : Pat<(loadv4i32 addr:$src), 3381 (VMOVDQUrm addr:$src)>; 3382 def : Pat<(loadv8i16 addr:$src), 3383 (VMOVDQUrm addr:$src)>; 3384 def : Pat<(loadv16i8 addr:$src), 3385 (VMOVDQUrm addr:$src)>; 3386 3387 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 3388 (VMOVDQAmr addr:$dst, VR128:$src)>; 3389 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 3390 (VMOVDQAmr addr:$dst, VR128:$src)>; 3391 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 3392 (VMOVDQAmr addr:$dst, VR128:$src)>; 3393 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 3394 (VMOVDQUmr addr:$dst, VR128:$src)>; 3395 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 3396 (VMOVDQUmr addr:$dst, VR128:$src)>; 3397 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 3398 (VMOVDQUmr addr:$dst, VR128:$src)>; 3399} 3400 3401//===---------------------------------------------------------------------===// 3402// SSE2 - Packed Integer Arithmetic Instructions 3403//===---------------------------------------------------------------------===// 3404 3405let ExeDomain = SSEPackedInt in { // SSE integer instructions 3406 3407/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 3408multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3409 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3410 PatFrag memop_frag, X86MemOperand x86memop, 3411 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3412 let isCommutable = 1 in 3413 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3414 (ins RC:$src1, RC:$src2), 3415 !if(Is2Addr, 3416 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3417 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3418 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 3419 Sched<[sched]>; 3420 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3421 (ins RC:$src1, x86memop:$src2), 3422 !if(Is2Addr, 3423 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3424 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3425 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 3426 (memop_frag addr:$src2))))]>, 3427 Sched<[sched.Folded, sched.ReadAfterFold]>; 3428} 3429} // ExeDomain = SSEPackedInt 3430 3431defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 3432 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3433defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 3434 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3435defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 3436 SchedWriteVecALU, 1, NoVLX>; 3437defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 3438 SchedWriteVecALU, 1, NoVLX>; 3439defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 3440 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3441defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 3442 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3443defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 3444 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3445defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 3446 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3447defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 3448 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3449defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 3450 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3451defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 3452 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3453defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 3454 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3455defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 3456 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3457defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 3458 SchedWriteVecALU, 0, NoVLX>; 3459defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 3460 SchedWriteVecALU, 0, NoVLX>; 3461defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 3462 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3463defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 3464 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3465defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 3466 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3467defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 3468 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3469defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 3470 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3471defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 3472 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3473defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 3474 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3475defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 3476 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3477defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, 3478 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3479defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, 3480 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3481defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 3482 SchedWriteVecIMul, 1, NoVLX>; 3483 3484let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3485defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3486 load, i128mem, SchedWriteVecIMul.XMM, 0>, 3487 VEX_4V, VEX_WIG; 3488 3489let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3490defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 3491 VR256, load, i256mem, SchedWriteVecIMul.YMM, 3492 0>, VEX_4V, VEX_L, VEX_WIG; 3493let Constraints = "$src1 = $dst" in 3494defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3495 memop, i128mem, SchedWriteVecIMul.XMM>; 3496 3497let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3498defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 3499 load, i128mem, SchedWritePSADBW.XMM, 0>, 3500 VEX_4V, VEX_WIG; 3501let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3502defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 3503 load, i256mem, SchedWritePSADBW.YMM, 0>, 3504 VEX_4V, VEX_L, VEX_WIG; 3505let Constraints = "$src1 = $dst" in 3506defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 3507 memop, i128mem, SchedWritePSADBW.XMM>; 3508 3509//===---------------------------------------------------------------------===// 3510// SSE2 - Packed Integer Logical Instructions 3511//===---------------------------------------------------------------------===// 3512 3513multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3514 string OpcodeStr, SDNode OpNode, 3515 SDNode OpNode2, RegisterClass RC, 3516 X86FoldableSchedWrite sched, 3517 X86FoldableSchedWrite schedImm, 3518 ValueType DstVT, ValueType SrcVT, 3519 PatFrag ld_frag, bit Is2Addr = 1> { 3520 // src2 is always 128-bit 3521 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3522 (ins RC:$src1, VR128:$src2), 3523 !if(Is2Addr, 3524 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3525 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3526 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 3527 Sched<[sched]>; 3528 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3529 (ins RC:$src1, i128mem:$src2), 3530 !if(Is2Addr, 3531 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3532 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3533 [(set RC:$dst, (DstVT (OpNode RC:$src1, 3534 (SrcVT (ld_frag addr:$src2)))))]>, 3535 Sched<[sched.Folded, sched.ReadAfterFold]>; 3536 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3537 (ins RC:$src1, u8imm:$src2), 3538 !if(Is2Addr, 3539 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3540 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3541 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, 3542 Sched<[schedImm]>; 3543} 3544 3545multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 3546 string OpcodeStr, SDNode OpNode, 3547 SDNode OpNode2, ValueType DstVT128, 3548 ValueType DstVT256, ValueType SrcVT, 3549 X86SchedWriteWidths sched, 3550 X86SchedWriteWidths schedImm, Predicate prd> { 3551let Predicates = [HasAVX, prd] in 3552 defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3553 OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 3554 DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG; 3555let Predicates = [HasAVX2, prd] in 3556 defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3557 OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 3558 DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, 3559 VEX_WIG; 3560let Constraints = "$src1 = $dst" in 3561 defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 3562 VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 3563 memop>; 3564} 3565 3566multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 3567 SDNode OpNode, RegisterClass RC, ValueType VT, 3568 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3569 def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 3570 !if(Is2Addr, 3571 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3572 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3573 [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, 3574 Sched<[sched]>; 3575} 3576 3577multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 3578 SDNode OpNode, X86SchedWriteWidths sched> { 3579let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3580 defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3581 VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG; 3582let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3583 defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3584 VR256, v32i8, sched.YMM, 0>, 3585 VEX_4V, VEX_L, VEX_WIG; 3586let Constraints = "$src1 = $dst" in 3587 defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 3588 sched.XMM>; 3589} 3590 3591let ExeDomain = SSEPackedInt in { 3592 defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 3593 v8i16, v16i16, v8i16, SchedWriteVecShift, 3594 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3595 defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 3596 v4i32, v8i32, v4i32, SchedWriteVecShift, 3597 SchedWriteVecShiftImm, NoVLX>; 3598 defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 3599 v2i64, v4i64, v2i64, SchedWriteVecShift, 3600 SchedWriteVecShiftImm, NoVLX>; 3601 3602 defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 3603 v8i16, v16i16, v8i16, SchedWriteVecShift, 3604 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3605 defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 3606 v4i32, v8i32, v4i32, SchedWriteVecShift, 3607 SchedWriteVecShiftImm, NoVLX>; 3608 defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 3609 v2i64, v4i64, v2i64, SchedWriteVecShift, 3610 SchedWriteVecShiftImm, NoVLX>; 3611 3612 defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 3613 v8i16, v16i16, v8i16, SchedWriteVecShift, 3614 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3615 defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 3616 v4i32, v8i32, v4i32, SchedWriteVecShift, 3617 SchedWriteVecShiftImm, NoVLX>; 3618 3619 defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 3620 SchedWriteShuffle>; 3621 defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 3622 SchedWriteShuffle>; 3623} // ExeDomain = SSEPackedInt 3624 3625//===---------------------------------------------------------------------===// 3626// SSE2 - Packed Integer Comparison Instructions 3627//===---------------------------------------------------------------------===// 3628 3629defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 3630 SchedWriteVecALU, 1, TruePredicate>; 3631defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 3632 SchedWriteVecALU, 1, TruePredicate>; 3633defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 3634 SchedWriteVecALU, 1, TruePredicate>; 3635defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 3636 SchedWriteVecALU, 0, TruePredicate>; 3637defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 3638 SchedWriteVecALU, 0, TruePredicate>; 3639defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 3640 SchedWriteVecALU, 0, TruePredicate>; 3641 3642//===---------------------------------------------------------------------===// 3643// SSE2 - Packed Integer Shuffle Instructions 3644//===---------------------------------------------------------------------===// 3645 3646let ExeDomain = SSEPackedInt in { 3647multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 3648 SDNode OpNode, X86SchedWriteWidths sched, 3649 Predicate prd> { 3650let Predicates = [HasAVX, prd] in { 3651 def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 3652 (ins VR128:$src1, u8imm:$src2), 3653 !strconcat("v", OpcodeStr, 3654 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3655 [(set VR128:$dst, 3656 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 3657 VEX, Sched<[sched.XMM]>, VEX_WIG; 3658 def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 3659 (ins i128mem:$src1, u8imm:$src2), 3660 !strconcat("v", OpcodeStr, 3661 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3662 [(set VR128:$dst, 3663 (vt128 (OpNode (load addr:$src1), 3664 (i8 timm:$src2))))]>, VEX, 3665 Sched<[sched.XMM.Folded]>, VEX_WIG; 3666} 3667 3668let Predicates = [HasAVX2, prd] in { 3669 def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 3670 (ins VR256:$src1, u8imm:$src2), 3671 !strconcat("v", OpcodeStr, 3672 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3673 [(set VR256:$dst, 3674 (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, 3675 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 3676 def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 3677 (ins i256mem:$src1, u8imm:$src2), 3678 !strconcat("v", OpcodeStr, 3679 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3680 [(set VR256:$dst, 3681 (vt256 (OpNode (load addr:$src1), 3682 (i8 timm:$src2))))]>, VEX, VEX_L, 3683 Sched<[sched.YMM.Folded]>, VEX_WIG; 3684} 3685 3686let Predicates = [UseSSE2] in { 3687 def ri : Ii8<0x70, MRMSrcReg, 3688 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 3689 !strconcat(OpcodeStr, 3690 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3691 [(set VR128:$dst, 3692 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 3693 Sched<[sched.XMM]>; 3694 def mi : Ii8<0x70, MRMSrcMem, 3695 (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 3696 !strconcat(OpcodeStr, 3697 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3698 [(set VR128:$dst, 3699 (vt128 (OpNode (memop addr:$src1), 3700 (i8 timm:$src2))))]>, 3701 Sched<[sched.XMM.Folded]>; 3702} 3703} 3704} // ExeDomain = SSEPackedInt 3705 3706defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 3707 SchedWriteShuffle, NoVLX>, PD; 3708defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 3709 SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; 3710defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 3711 SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; 3712 3713//===---------------------------------------------------------------------===// 3714// Packed Integer Pack Instructions (SSE & AVX) 3715//===---------------------------------------------------------------------===// 3716 3717let ExeDomain = SSEPackedInt in { 3718multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3719 ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3720 X86MemOperand x86memop, X86FoldableSchedWrite sched, 3721 PatFrag ld_frag, bit Is2Addr = 1> { 3722 def rr : PDI<opc, MRMSrcReg, 3723 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3724 !if(Is2Addr, 3725 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3726 !strconcat(OpcodeStr, 3727 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3728 [(set RC:$dst, 3729 (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3730 Sched<[sched]>; 3731 def rm : PDI<opc, MRMSrcMem, 3732 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3733 !if(Is2Addr, 3734 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3735 !strconcat(OpcodeStr, 3736 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3737 [(set RC:$dst, 3738 (OutVT (OpNode (ArgVT RC:$src1), 3739 (ld_frag addr:$src2))))]>, 3740 Sched<[sched.Folded, sched.ReadAfterFold]>; 3741} 3742 3743multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3744 ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3745 X86MemOperand x86memop, X86FoldableSchedWrite sched, 3746 PatFrag ld_frag, bit Is2Addr = 1> { 3747 def rr : SS48I<opc, MRMSrcReg, 3748 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3749 !if(Is2Addr, 3750 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3751 !strconcat(OpcodeStr, 3752 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3753 [(set RC:$dst, 3754 (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3755 Sched<[sched]>; 3756 def rm : SS48I<opc, MRMSrcMem, 3757 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3758 !if(Is2Addr, 3759 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3760 !strconcat(OpcodeStr, 3761 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3762 [(set RC:$dst, 3763 (OutVT (OpNode (ArgVT RC:$src1), 3764 (ld_frag addr:$src2))))]>, 3765 Sched<[sched.Folded, sched.ReadAfterFold]>; 3766} 3767 3768let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3769 defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 3770 i128mem, SchedWriteShuffle.XMM, load, 0>, 3771 VEX_4V, VEX_WIG; 3772 defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 3773 i128mem, SchedWriteShuffle.XMM, load, 0>, 3774 VEX_4V, VEX_WIG; 3775 3776 defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 3777 i128mem, SchedWriteShuffle.XMM, load, 0>, 3778 VEX_4V, VEX_WIG; 3779 defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 3780 i128mem, SchedWriteShuffle.XMM, load, 0>, 3781 VEX_4V, VEX_WIG; 3782} 3783 3784let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3785 defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 3786 i256mem, SchedWriteShuffle.YMM, load, 0>, 3787 VEX_4V, VEX_L, VEX_WIG; 3788 defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 3789 i256mem, SchedWriteShuffle.YMM, load, 0>, 3790 VEX_4V, VEX_L, VEX_WIG; 3791 3792 defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 3793 i256mem, SchedWriteShuffle.YMM, load, 0>, 3794 VEX_4V, VEX_L, VEX_WIG; 3795 defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 3796 i256mem, SchedWriteShuffle.YMM, load, 0>, 3797 VEX_4V, VEX_L, VEX_WIG; 3798} 3799 3800let Constraints = "$src1 = $dst" in { 3801 defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 3802 i128mem, SchedWriteShuffle.XMM, memop>; 3803 defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 3804 i128mem, SchedWriteShuffle.XMM, memop>; 3805 3806 defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 3807 i128mem, SchedWriteShuffle.XMM, memop>; 3808 3809 defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 3810 i128mem, SchedWriteShuffle.XMM, memop>; 3811} 3812} // ExeDomain = SSEPackedInt 3813 3814//===---------------------------------------------------------------------===// 3815// SSE2 - Packed Integer Unpack Instructions 3816//===---------------------------------------------------------------------===// 3817 3818let ExeDomain = SSEPackedInt in { 3819multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 3820 SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 3821 X86FoldableSchedWrite sched, PatFrag ld_frag, 3822 bit Is2Addr = 1> { 3823 def rr : PDI<opc, MRMSrcReg, 3824 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3825 !if(Is2Addr, 3826 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3827 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3828 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 3829 Sched<[sched]>; 3830 def rm : PDI<opc, MRMSrcMem, 3831 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3832 !if(Is2Addr, 3833 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3834 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3835 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 3836 Sched<[sched.Folded, sched.ReadAfterFold]>; 3837} 3838 3839let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3840 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 3841 i128mem, SchedWriteShuffle.XMM, load, 0>, 3842 VEX_4V, VEX_WIG; 3843 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 3844 i128mem, SchedWriteShuffle.XMM, load, 0>, 3845 VEX_4V, VEX_WIG; 3846 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 3847 i128mem, SchedWriteShuffle.XMM, load, 0>, 3848 VEX_4V, VEX_WIG; 3849 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 3850 i128mem, SchedWriteShuffle.XMM, load, 0>, 3851 VEX_4V, VEX_WIG; 3852} 3853 3854let Predicates = [HasAVX, NoVLX] in { 3855 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 3856 i128mem, SchedWriteShuffle.XMM, load, 0>, 3857 VEX_4V, VEX_WIG; 3858 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 3859 i128mem, SchedWriteShuffle.XMM, load, 0>, 3860 VEX_4V, VEX_WIG; 3861 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 3862 i128mem, SchedWriteShuffle.XMM, load, 0>, 3863 VEX_4V, VEX_WIG; 3864 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 3865 i128mem, SchedWriteShuffle.XMM, load, 0>, 3866 VEX_4V, VEX_WIG; 3867} 3868 3869let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3870 defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 3871 i256mem, SchedWriteShuffle.YMM, load, 0>, 3872 VEX_4V, VEX_L, VEX_WIG; 3873 defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 3874 i256mem, SchedWriteShuffle.YMM, load, 0>, 3875 VEX_4V, VEX_L, VEX_WIG; 3876 defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 3877 i256mem, SchedWriteShuffle.YMM, load, 0>, 3878 VEX_4V, VEX_L, VEX_WIG; 3879 defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 3880 i256mem, SchedWriteShuffle.YMM, load, 0>, 3881 VEX_4V, VEX_L, VEX_WIG; 3882} 3883 3884let Predicates = [HasAVX2, NoVLX] in { 3885 defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 3886 i256mem, SchedWriteShuffle.YMM, load, 0>, 3887 VEX_4V, VEX_L, VEX_WIG; 3888 defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 3889 i256mem, SchedWriteShuffle.YMM, load, 0>, 3890 VEX_4V, VEX_L, VEX_WIG; 3891 defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 3892 i256mem, SchedWriteShuffle.YMM, load, 0>, 3893 VEX_4V, VEX_L, VEX_WIG; 3894 defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 3895 i256mem, SchedWriteShuffle.YMM, load, 0>, 3896 VEX_4V, VEX_L, VEX_WIG; 3897} 3898 3899let Constraints = "$src1 = $dst" in { 3900 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 3901 i128mem, SchedWriteShuffle.XMM, memop>; 3902 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 3903 i128mem, SchedWriteShuffle.XMM, memop>; 3904 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 3905 i128mem, SchedWriteShuffle.XMM, memop>; 3906 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 3907 i128mem, SchedWriteShuffle.XMM, memop>; 3908 3909 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 3910 i128mem, SchedWriteShuffle.XMM, memop>; 3911 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 3912 i128mem, SchedWriteShuffle.XMM, memop>; 3913 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 3914 i128mem, SchedWriteShuffle.XMM, memop>; 3915 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 3916 i128mem, SchedWriteShuffle.XMM, memop>; 3917} 3918} // ExeDomain = SSEPackedInt 3919 3920//===---------------------------------------------------------------------===// 3921// SSE2 - Packed Integer Extract and Insert 3922//===---------------------------------------------------------------------===// 3923 3924let ExeDomain = SSEPackedInt in { 3925multiclass sse2_pinsrw<bit Is2Addr = 1> { 3926 def rr : Ii8<0xC4, MRMSrcReg, 3927 (outs VR128:$dst), (ins VR128:$src1, 3928 GR32orGR64:$src2, u8imm:$src3), 3929 !if(Is2Addr, 3930 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3931 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3932 [(set VR128:$dst, 3933 (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 3934 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 3935 def rm : Ii8<0xC4, MRMSrcMem, 3936 (outs VR128:$dst), (ins VR128:$src1, 3937 i16mem:$src2, u8imm:$src3), 3938 !if(Is2Addr, 3939 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3940 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3941 [(set VR128:$dst, 3942 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 3943 timm:$src3))]>, 3944 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 3945} 3946 3947// Extract 3948let Predicates = [HasAVX, NoBWI] in 3949def VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 3950 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3951 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3952 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3953 timm:$src2))]>, 3954 PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; 3955def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 3956 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3957 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3958 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3959 timm:$src2))]>, 3960 Sched<[WriteVecExtract]>; 3961 3962// Insert 3963let Predicates = [HasAVX, NoBWI] in 3964defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG; 3965 3966let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 3967defm PINSRW : sse2_pinsrw, PD; 3968 3969} // ExeDomain = SSEPackedInt 3970 3971//===---------------------------------------------------------------------===// 3972// SSE2 - Packed Mask Creation 3973//===---------------------------------------------------------------------===// 3974 3975let ExeDomain = SSEPackedInt in { 3976 3977def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3978 (ins VR128:$src), 3979 "pmovmskb\t{$src, $dst|$dst, $src}", 3980 [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3981 Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG; 3982 3983let Predicates = [HasAVX2] in { 3984def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3985 (ins VR256:$src), 3986 "pmovmskb\t{$src, $dst|$dst, $src}", 3987 [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 3988 Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG; 3989} 3990 3991def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 3992 "pmovmskb\t{$src, $dst|$dst, $src}", 3993 [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3994 Sched<[WriteVecMOVMSK]>; 3995 3996} // ExeDomain = SSEPackedInt 3997 3998//===---------------------------------------------------------------------===// 3999// SSE2 - Conditional Store 4000//===---------------------------------------------------------------------===// 4001 4002let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 4003let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 4004def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4005 (ins VR128:$src, VR128:$mask), 4006 "maskmovdqu\t{$mask, $src|$src, $mask}", 4007 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4008 VEX, VEX_WIG; 4009let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4010def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4011 (ins VR128:$src, VR128:$mask), 4012 "maskmovdqu\t{$mask, $src|$src, $mask}", 4013 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 4014 VEX, VEX_WIG, AdSize64; 4015let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in 4016def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs), 4017 (ins VR128:$src, VR128:$mask), "", 4018 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4019 VEX, VEX_WIG, AdSize32 { 4020 let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}"; 4021 let AsmVariantName = "NonParsable"; 4022} 4023 4024let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 4025def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4026 "maskmovdqu\t{$mask, $src|$src, $mask}", 4027 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 4028let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4029def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4030 "maskmovdqu\t{$mask, $src|$src, $mask}", 4031 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 4032 AdSize64; 4033let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in 4034def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4035 "addr32 maskmovdqu\t{$mask, $src|$src, $mask}", 4036 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4037 AdSize32 { 4038 let AsmVariantName = "NonParsable"; 4039} 4040 4041} // ExeDomain = SSEPackedInt 4042 4043//===---------------------------------------------------------------------===// 4044// SSE2 - Move Doubleword/Quadword 4045//===---------------------------------------------------------------------===// 4046 4047//===---------------------------------------------------------------------===// 4048// Move Int Doubleword to Packed Double Int 4049// 4050let ExeDomain = SSEPackedInt in { 4051def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4052 "movd\t{$src, $dst|$dst, $src}", 4053 [(set VR128:$dst, 4054 (v4i32 (scalar_to_vector GR32:$src)))]>, 4055 VEX, Sched<[WriteVecMoveFromGpr]>; 4056def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4057 "movd\t{$src, $dst|$dst, $src}", 4058 [(set VR128:$dst, 4059 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4060 VEX, Sched<[WriteVecLoad]>; 4061def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4062 "movq\t{$src, $dst|$dst, $src}", 4063 [(set VR128:$dst, 4064 (v2i64 (scalar_to_vector GR64:$src)))]>, 4065 VEX, Sched<[WriteVecMoveFromGpr]>; 4066let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4067def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4068 "movq\t{$src, $dst|$dst, $src}", []>, 4069 VEX, Sched<[WriteVecLoad]>; 4070let isCodeGenOnly = 1 in 4071def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4072 "movq\t{$src, $dst|$dst, $src}", 4073 [(set FR64:$dst, (bitconvert GR64:$src))]>, 4074 VEX, Sched<[WriteVecMoveFromGpr]>; 4075 4076def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4077 "movd\t{$src, $dst|$dst, $src}", 4078 [(set VR128:$dst, 4079 (v4i32 (scalar_to_vector GR32:$src)))]>, 4080 Sched<[WriteVecMoveFromGpr]>; 4081def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4082 "movd\t{$src, $dst|$dst, $src}", 4083 [(set VR128:$dst, 4084 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4085 Sched<[WriteVecLoad]>; 4086def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4087 "movq\t{$src, $dst|$dst, $src}", 4088 [(set VR128:$dst, 4089 (v2i64 (scalar_to_vector GR64:$src)))]>, 4090 Sched<[WriteVecMoveFromGpr]>; 4091let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4092def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4093 "movq\t{$src, $dst|$dst, $src}", []>, 4094 Sched<[WriteVecLoad]>; 4095let isCodeGenOnly = 1 in 4096def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4097 "movq\t{$src, $dst|$dst, $src}", 4098 [(set FR64:$dst, (bitconvert GR64:$src))]>, 4099 Sched<[WriteVecMoveFromGpr]>; 4100} // ExeDomain = SSEPackedInt 4101 4102//===---------------------------------------------------------------------===// 4103// Move Int Doubleword to Single Scalar 4104// 4105let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4106 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4107 "movd\t{$src, $dst|$dst, $src}", 4108 [(set FR32:$dst, (bitconvert GR32:$src))]>, 4109 VEX, Sched<[WriteVecMoveFromGpr]>; 4110 4111 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4112 "movd\t{$src, $dst|$dst, $src}", 4113 [(set FR32:$dst, (bitconvert GR32:$src))]>, 4114 Sched<[WriteVecMoveFromGpr]>; 4115 4116} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4117 4118//===---------------------------------------------------------------------===// 4119// Move Packed Doubleword Int to Packed Double Int 4120// 4121let ExeDomain = SSEPackedInt in { 4122def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4123 "movd\t{$src, $dst|$dst, $src}", 4124 [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4125 (iPTR 0)))]>, VEX, 4126 Sched<[WriteVecMoveToGpr]>; 4127def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4128 (ins i32mem:$dst, VR128:$src), 4129 "movd\t{$src, $dst|$dst, $src}", 4130 [(store (i32 (extractelt (v4i32 VR128:$src), 4131 (iPTR 0))), addr:$dst)]>, 4132 VEX, Sched<[WriteVecStore]>; 4133def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4134 "movd\t{$src, $dst|$dst, $src}", 4135 [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4136 (iPTR 0)))]>, 4137 Sched<[WriteVecMoveToGpr]>; 4138def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4139 "movd\t{$src, $dst|$dst, $src}", 4140 [(store (i32 (extractelt (v4i32 VR128:$src), 4141 (iPTR 0))), addr:$dst)]>, 4142 Sched<[WriteVecStore]>; 4143} // ExeDomain = SSEPackedInt 4144 4145//===---------------------------------------------------------------------===// 4146// Move Packed Doubleword Int first element to Doubleword Int 4147// 4148let ExeDomain = SSEPackedInt in { 4149let SchedRW = [WriteVecMoveToGpr] in { 4150def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4151 "movq\t{$src, $dst|$dst, $src}", 4152 [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4153 (iPTR 0)))]>, 4154 VEX; 4155 4156def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4157 "movq\t{$src, $dst|$dst, $src}", 4158 [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4159 (iPTR 0)))]>; 4160} //SchedRW 4161 4162let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4163def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 4164 (ins i64mem:$dst, VR128:$src), 4165 "movq\t{$src, $dst|$dst, $src}", []>, 4166 VEX, Sched<[WriteVecStore]>; 4167let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4168def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4169 "movq\t{$src, $dst|$dst, $src}", []>, 4170 Sched<[WriteVecStore]>; 4171} // ExeDomain = SSEPackedInt 4172 4173//===---------------------------------------------------------------------===// 4174// Bitcast FR64 <-> GR64 4175// 4176let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4177 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4178 "movq\t{$src, $dst|$dst, $src}", 4179 [(set GR64:$dst, (bitconvert FR64:$src))]>, 4180 VEX, Sched<[WriteVecMoveToGpr]>; 4181 4182 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4183 "movq\t{$src, $dst|$dst, $src}", 4184 [(set GR64:$dst, (bitconvert FR64:$src))]>, 4185 Sched<[WriteVecMoveToGpr]>; 4186} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4187 4188//===---------------------------------------------------------------------===// 4189// Move Scalar Single to Double Int 4190// 4191let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4192 def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4193 "movd\t{$src, $dst|$dst, $src}", 4194 [(set GR32:$dst, (bitconvert FR32:$src))]>, 4195 VEX, Sched<[WriteVecMoveToGpr]>; 4196 def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4197 "movd\t{$src, $dst|$dst, $src}", 4198 [(set GR32:$dst, (bitconvert FR32:$src))]>, 4199 Sched<[WriteVecMoveToGpr]>; 4200} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4201 4202let Predicates = [UseAVX] in { 4203 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4204 (VMOVDI2PDIrr GR32:$src)>; 4205 4206 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4207 (VMOV64toPQIrr GR64:$src)>; 4208 4209 // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 4210 // These instructions also write zeros in the high part of a 256-bit register. 4211 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4212 (VMOVDI2PDIrm addr:$src)>; 4213 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4214 (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 4215} 4216 4217let Predicates = [UseSSE2] in { 4218 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4219 (MOVDI2PDIrr GR32:$src)>; 4220 4221 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4222 (MOV64toPQIrr GR64:$src)>; 4223 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4224 (MOVDI2PDIrm addr:$src)>; 4225} 4226 4227// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 4228// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 4229// these aliases. 4230def : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4231 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4232def : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4233 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4234// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4235def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4236 (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4237def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4238 (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4239 4240//===---------------------------------------------------------------------===// 4241// SSE2 - Move Quadword 4242//===---------------------------------------------------------------------===// 4243 4244//===---------------------------------------------------------------------===// 4245// Move Quadword Int to Packed Quadword Int 4246// 4247 4248let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 4249def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4250 "vmovq\t{$src, $dst|$dst, $src}", 4251 [(set VR128:$dst, 4252 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4253 VEX, Requires<[UseAVX]>, VEX_WIG; 4254def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4255 "movq\t{$src, $dst|$dst, $src}", 4256 [(set VR128:$dst, 4257 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4258 XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4259} // ExeDomain, SchedRW 4260 4261//===---------------------------------------------------------------------===// 4262// Move Packed Quadword Int to Quadword Int 4263// 4264let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 4265def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4266 "movq\t{$src, $dst|$dst, $src}", 4267 [(store (i64 (extractelt (v2i64 VR128:$src), 4268 (iPTR 0))), addr:$dst)]>, 4269 VEX, VEX_WIG; 4270def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4271 "movq\t{$src, $dst|$dst, $src}", 4272 [(store (i64 (extractelt (v2i64 VR128:$src), 4273 (iPTR 0))), addr:$dst)]>; 4274} // ExeDomain, SchedRW 4275 4276// For disassembler only 4277let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4278 SchedRW = [SchedWriteVecLogic.XMM] in { 4279def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4280 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; 4281def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4282 "movq\t{$src, $dst|$dst, $src}", []>; 4283} 4284 4285def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4286 (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4287def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 4288 (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4289 4290let Predicates = [UseAVX] in { 4291 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4292 (VMOVQI2PQIrm addr:$src)>; 4293 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4294 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 4295 4296 def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4297 (VMOVPQI2QImr addr:$dst, VR128:$src)>; 4298} 4299 4300let Predicates = [UseSSE2] in { 4301 def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 4302 4303 def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4304 (MOVPQI2QImr addr:$dst, VR128:$src)>; 4305} 4306 4307//===---------------------------------------------------------------------===// 4308// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4309// IA32 document. movq xmm1, xmm2 does clear the high bits. 4310// 4311let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4312def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4313 "vmovq\t{$src, $dst|$dst, $src}", 4314 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4315 XS, VEX, Requires<[UseAVX]>, VEX_WIG; 4316def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4317 "movq\t{$src, $dst|$dst, $src}", 4318 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4319 XS, Requires<[UseSSE2]>; 4320} // ExeDomain, SchedRW 4321 4322let Predicates = [UseAVX] in { 4323 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4324 (VMOVZPQILo2PQIrr VR128:$src)>; 4325} 4326let Predicates = [UseSSE2] in { 4327 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4328 (MOVZPQILo2PQIrr VR128:$src)>; 4329} 4330 4331let Predicates = [UseAVX] in { 4332 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 4333 (SUBREG_TO_REG (i32 0), 4334 (v2f64 (VMOVZPQILo2PQIrr 4335 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 4336 sub_xmm)>; 4337 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 4338 (SUBREG_TO_REG (i32 0), 4339 (v2i64 (VMOVZPQILo2PQIrr 4340 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 4341 sub_xmm)>; 4342} 4343 4344//===---------------------------------------------------------------------===// 4345// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4346//===---------------------------------------------------------------------===// 4347 4348multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4349 ValueType vt, RegisterClass RC, PatFrag mem_frag, 4350 X86MemOperand x86memop, X86FoldableSchedWrite sched> { 4351def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4352 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4353 [(set RC:$dst, (vt (OpNode RC:$src)))]>, 4354 Sched<[sched]>; 4355def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4356 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4357 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 4358 Sched<[sched.Folded]>; 4359} 4360 4361let Predicates = [HasAVX, NoVLX] in { 4362 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4363 v4f32, VR128, loadv4f32, f128mem, 4364 SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4365 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4366 v4f32, VR128, loadv4f32, f128mem, 4367 SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4368 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4369 v8f32, VR256, loadv8f32, f256mem, 4370 SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4371 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4372 v8f32, VR256, loadv8f32, f256mem, 4373 SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4374} 4375defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4376 memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4377defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4378 memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4379 4380let Predicates = [HasAVX, NoVLX] in { 4381 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4382 (VMOVSHDUPrr VR128:$src)>; 4383 def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 4384 (VMOVSHDUPrm addr:$src)>; 4385 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4386 (VMOVSLDUPrr VR128:$src)>; 4387 def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 4388 (VMOVSLDUPrm addr:$src)>; 4389 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4390 (VMOVSHDUPYrr VR256:$src)>; 4391 def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 4392 (VMOVSHDUPYrm addr:$src)>; 4393 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 4394 (VMOVSLDUPYrr VR256:$src)>; 4395 def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 4396 (VMOVSLDUPYrm addr:$src)>; 4397} 4398 4399let Predicates = [UseSSE3] in { 4400 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4401 (MOVSHDUPrr VR128:$src)>; 4402 def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 4403 (MOVSHDUPrm addr:$src)>; 4404 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4405 (MOVSLDUPrr VR128:$src)>; 4406 def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 4407 (MOVSLDUPrm addr:$src)>; 4408} 4409 4410//===---------------------------------------------------------------------===// 4411// SSE3 - Replicate Double FP - MOVDDUP 4412//===---------------------------------------------------------------------===// 4413 4414multiclass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 4415def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4416 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4417 [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 4418 Sched<[sched.XMM]>; 4419def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 4420 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4421 [(set VR128:$dst, 4422 (v2f64 (X86Movddup 4423 (scalar_to_vector (loadf64 addr:$src)))))]>, 4424 Sched<[sched.XMM.Folded]>; 4425} 4426 4427// FIXME: Merge with above classes when there are patterns for the ymm version 4428multiclass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 4429def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4430 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4431 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 4432 Sched<[sched.YMM]>; 4433def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 4434 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4435 [(set VR256:$dst, 4436 (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 4437 Sched<[sched.YMM.Folded]>; 4438} 4439 4440let Predicates = [HasAVX, NoVLX] in { 4441 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 4442 VEX, VEX_WIG; 4443 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 4444 VEX, VEX_L, VEX_WIG; 4445} 4446 4447defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 4448 4449 4450let Predicates = [HasAVX, NoVLX] in { 4451 def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4452 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4453} 4454 4455let Predicates = [UseSSE3] in { 4456 def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4457 (MOVDDUPrm addr:$src)>; 4458} 4459 4460//===---------------------------------------------------------------------===// 4461// SSE3 - Move Unaligned Integer 4462//===---------------------------------------------------------------------===// 4463 4464let Predicates = [HasAVX] in { 4465 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4466 "vlddqu\t{$src, $dst|$dst, $src}", 4467 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4468 Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 4469 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4470 "vlddqu\t{$src, $dst|$dst, $src}", 4471 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 4472 Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; 4473} // Predicates 4474 4475def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4476 "lddqu\t{$src, $dst|$dst, $src}", 4477 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4478 Sched<[SchedWriteVecMoveLS.XMM.RM]>; 4479 4480//===---------------------------------------------------------------------===// 4481// SSE3 - Arithmetic 4482//===---------------------------------------------------------------------===// 4483 4484multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 4485 X86MemOperand x86memop, X86FoldableSchedWrite sched, 4486 PatFrag ld_frag, bit Is2Addr = 1> { 4487let Uses = [MXCSR], mayRaiseFPException = 1 in { 4488 def rr : I<0xD0, MRMSrcReg, 4489 (outs RC:$dst), (ins RC:$src1, RC:$src2), 4490 !if(Is2Addr, 4491 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4492 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4493 [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 4494 Sched<[sched]>; 4495 def rm : I<0xD0, MRMSrcMem, 4496 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4497 !if(Is2Addr, 4498 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4499 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4500 [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 4501 Sched<[sched.Folded, sched.ReadAfterFold]>; 4502} 4503} 4504 4505let Predicates = [HasAVX] in { 4506 let ExeDomain = SSEPackedSingle in { 4507 defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 4508 SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 4509 XD, VEX_4V, VEX_WIG; 4510 defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 4511 SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 4512 XD, VEX_4V, VEX_L, VEX_WIG; 4513 } 4514 let ExeDomain = SSEPackedDouble in { 4515 defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 4516 SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 4517 PD, VEX_4V, VEX_WIG; 4518 defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 4519 SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 4520 PD, VEX_4V, VEX_L, VEX_WIG; 4521 } 4522} 4523let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 4524 let ExeDomain = SSEPackedSingle in 4525 defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 4526 SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; 4527 let ExeDomain = SSEPackedDouble in 4528 defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 4529 SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; 4530} 4531 4532//===---------------------------------------------------------------------===// 4533// SSE3 Instructions 4534//===---------------------------------------------------------------------===// 4535 4536// Horizontal ops 4537multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4538 X86MemOperand x86memop, SDNode OpNode, 4539 X86FoldableSchedWrite sched, PatFrag ld_frag, 4540 bit Is2Addr = 1> { 4541let Uses = [MXCSR], mayRaiseFPException = 1 in { 4542 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4543 !if(Is2Addr, 4544 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4545 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4546 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4547 Sched<[sched]>; 4548 4549 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4550 !if(Is2Addr, 4551 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4552 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4553 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4554 Sched<[sched.Folded, sched.ReadAfterFold]>; 4555} 4556} 4557multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4558 X86MemOperand x86memop, SDNode OpNode, 4559 X86FoldableSchedWrite sched, PatFrag ld_frag, 4560 bit Is2Addr = 1> { 4561let Uses = [MXCSR], mayRaiseFPException = 1 in { 4562 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4563 !if(Is2Addr, 4564 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4565 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4566 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4567 Sched<[sched]>; 4568 4569 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4570 !if(Is2Addr, 4571 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4572 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4573 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4574 Sched<[sched.Folded, sched.ReadAfterFold]>; 4575} 4576} 4577 4578let Predicates = [HasAVX] in { 4579 let ExeDomain = SSEPackedSingle in { 4580 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4581 X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4582 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4583 X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4584 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4585 X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4586 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4587 X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4588 } 4589 let ExeDomain = SSEPackedDouble in { 4590 defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 4591 X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4592 defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 4593 X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4594 defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 4595 X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4596 defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 4597 X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4598 } 4599} 4600 4601let Constraints = "$src1 = $dst" in { 4602 let ExeDomain = SSEPackedSingle in { 4603 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 4604 WriteFHAdd, memopv4f32>; 4605 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 4606 WriteFHAdd, memopv4f32>; 4607 } 4608 let ExeDomain = SSEPackedDouble in { 4609 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 4610 WriteFHAdd, memopv2f64>; 4611 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 4612 WriteFHAdd, memopv2f64>; 4613 } 4614} 4615 4616//===---------------------------------------------------------------------===// 4617// SSSE3 - Packed Absolute Instructions 4618//===---------------------------------------------------------------------===// 4619 4620/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4621multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 4622 SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 4623 def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4624 (ins VR128:$src), 4625 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4626 [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 4627 Sched<[sched.XMM]>; 4628 4629 def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4630 (ins i128mem:$src), 4631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4632 [(set VR128:$dst, 4633 (vt (OpNode (ld_frag addr:$src))))]>, 4634 Sched<[sched.XMM.Folded]>; 4635} 4636 4637/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4638multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 4639 SDNode OpNode, X86SchedWriteWidths sched> { 4640 def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4641 (ins VR256:$src), 4642 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4643 [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 4644 Sched<[sched.YMM]>; 4645 4646 def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4647 (ins i256mem:$src), 4648 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4649 [(set VR256:$dst, 4650 (vt (OpNode (load addr:$src))))]>, 4651 Sched<[sched.YMM.Folded]>; 4652} 4653 4654let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4655 defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 4656 load>, VEX, VEX_WIG; 4657 defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 4658 load>, VEX, VEX_WIG; 4659} 4660let Predicates = [HasAVX, NoVLX] in { 4661 defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 4662 load>, VEX, VEX_WIG; 4663} 4664let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4665 defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 4666 VEX, VEX_L, VEX_WIG; 4667 defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 4668 VEX, VEX_L, VEX_WIG; 4669} 4670let Predicates = [HasAVX2, NoVLX] in { 4671 defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 4672 VEX, VEX_L, VEX_WIG; 4673} 4674 4675defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 4676 memop>; 4677defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 4678 memop>; 4679defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 4680 memop>; 4681 4682//===---------------------------------------------------------------------===// 4683// SSSE3 - Packed Binary Operator Instructions 4684//===---------------------------------------------------------------------===// 4685 4686/// SS3I_binop_rm - Simple SSSE3 bin op 4687multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4688 ValueType DstVT, ValueType OpVT, RegisterClass RC, 4689 PatFrag memop_frag, X86MemOperand x86memop, 4690 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4691 let isCommutable = 1 in 4692 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 4693 (ins RC:$src1, RC:$src2), 4694 !if(Is2Addr, 4695 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4696 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4697 [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 4698 Sched<[sched]>; 4699 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 4700 (ins RC:$src1, x86memop:$src2), 4701 !if(Is2Addr, 4702 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4703 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4704 [(set RC:$dst, 4705 (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 4706 Sched<[sched.Folded, sched.ReadAfterFold]>; 4707} 4708 4709/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 4710multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 4711 Intrinsic IntId128, X86FoldableSchedWrite sched, 4712 PatFrag ld_frag, bit Is2Addr = 1> { 4713 let isCommutable = 1 in 4714 def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4715 (ins VR128:$src1, VR128:$src2), 4716 !if(Is2Addr, 4717 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4718 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4719 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 4720 Sched<[sched]>; 4721 def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4722 (ins VR128:$src1, i128mem:$src2), 4723 !if(Is2Addr, 4724 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4725 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4726 [(set VR128:$dst, 4727 (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 4728 Sched<[sched.Folded, sched.ReadAfterFold]>; 4729} 4730 4731multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 4732 Intrinsic IntId256, 4733 X86FoldableSchedWrite sched> { 4734 let isCommutable = 1 in 4735 def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4736 (ins VR256:$src1, VR256:$src2), 4737 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4738 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 4739 Sched<[sched]>; 4740 def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4741 (ins VR256:$src1, i256mem:$src2), 4742 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4743 [(set VR256:$dst, 4744 (IntId256 VR256:$src1, (load addr:$src2)))]>, 4745 Sched<[sched.Folded, sched.ReadAfterFold]>; 4746} 4747 4748let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4749let isCommutable = 0 in { 4750 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 4751 VR128, load, i128mem, 4752 SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4753 defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 4754 v16i8, VR128, load, i128mem, 4755 SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4756} 4757defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 4758 VR128, load, i128mem, 4759 SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4760} 4761 4762let ImmT = NoImm, Predicates = [HasAVX] in { 4763let isCommutable = 0 in { 4764 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 4765 load, i128mem, 4766 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4767 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 4768 load, i128mem, 4769 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4770 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 4771 load, i128mem, 4772 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4773 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 4774 load, i128mem, 4775 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4776 defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 4777 int_x86_ssse3_psign_b_128, 4778 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4779 defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 4780 int_x86_ssse3_psign_w_128, 4781 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4782 defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 4783 int_x86_ssse3_psign_d_128, 4784 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4785 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 4786 int_x86_ssse3_phadd_sw_128, 4787 SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4788 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 4789 int_x86_ssse3_phsub_sw_128, 4790 SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4791} 4792} 4793 4794let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4795let isCommutable = 0 in { 4796 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 4797 VR256, load, i256mem, 4798 SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4799 defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 4800 v32i8, VR256, load, i256mem, 4801 SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4802} 4803defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 4804 VR256, load, i256mem, 4805 SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4806} 4807 4808let ImmT = NoImm, Predicates = [HasAVX2] in { 4809let isCommutable = 0 in { 4810 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 4811 VR256, load, i256mem, 4812 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4813 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 4814 load, i256mem, 4815 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4816 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 4817 VR256, load, i256mem, 4818 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4819 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 4820 load, i256mem, 4821 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4822 defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 4823 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4824 defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 4825 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4826 defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 4827 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4828 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 4829 int_x86_avx2_phadd_sw, 4830 SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4831 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 4832 int_x86_avx2_phsub_sw, 4833 SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4834} 4835} 4836 4837// None of these have i8 immediate fields. 4838let ImmT = NoImm, Constraints = "$src1 = $dst" in { 4839let isCommutable = 0 in { 4840 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 4841 memop, i128mem, SchedWritePHAdd.XMM>; 4842 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 4843 memop, i128mem, SchedWritePHAdd.XMM>; 4844 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 4845 memop, i128mem, SchedWritePHAdd.XMM>; 4846 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 4847 memop, i128mem, SchedWritePHAdd.XMM>; 4848 defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 4849 SchedWriteVecALU.XMM, memop>; 4850 defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 4851 SchedWriteVecALU.XMM, memop>; 4852 defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 4853 SchedWriteVecALU.XMM, memop>; 4854 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 4855 memop, i128mem, SchedWriteVarShuffle.XMM>; 4856 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 4857 int_x86_ssse3_phadd_sw_128, 4858 SchedWritePHAdd.XMM, memop>; 4859 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 4860 int_x86_ssse3_phsub_sw_128, 4861 SchedWritePHAdd.XMM, memop>; 4862 defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 4863 v16i8, VR128, memop, i128mem, 4864 SchedWriteVecIMul.XMM>; 4865} 4866defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 4867 VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 4868} 4869 4870//===---------------------------------------------------------------------===// 4871// SSSE3 - Packed Align Instruction Patterns 4872//===---------------------------------------------------------------------===// 4873 4874multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 4875 PatFrag memop_frag, X86MemOperand x86memop, 4876 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4877 let hasSideEffects = 0 in { 4878 def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 4879 (ins RC:$src1, RC:$src2, u8imm:$src3), 4880 !if(Is2Addr, 4881 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4882 !strconcat(asm, 4883 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4884 [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, 4885 Sched<[sched]>; 4886 let mayLoad = 1 in 4887 def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 4888 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 4889 !if(Is2Addr, 4890 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4891 !strconcat(asm, 4892 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4893 [(set RC:$dst, (VT (X86PAlignr RC:$src1, 4894 (memop_frag addr:$src2), 4895 (i8 timm:$src3))))]>, 4896 Sched<[sched.Folded, sched.ReadAfterFold]>; 4897 } 4898} 4899 4900let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 4901 defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 4902 SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4903let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 4904 defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 4905 SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4906let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 4907 defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 4908 SchedWriteShuffle.XMM>; 4909 4910//===---------------------------------------------------------------------===// 4911// SSSE3 - Thread synchronization 4912//===---------------------------------------------------------------------===// 4913 4914let SchedRW = [WriteSystem] in { 4915let Uses = [EAX, ECX, EDX] in 4916def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4917 TB, Requires<[HasSSE3, Not64BitMode]>; 4918let Uses = [RAX, ECX, EDX] in 4919def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4920 TB, Requires<[HasSSE3, In64BitMode]>; 4921 4922let Uses = [ECX, EAX] in 4923def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 4924 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 4925} // SchedRW 4926 4927def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 4928def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 4929 4930def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 4931 Requires<[Not64BitMode]>; 4932def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 4933 Requires<[In64BitMode]>; 4934 4935//===----------------------------------------------------------------------===// 4936// SSE4.1 - Packed Move with Sign/Zero Extend 4937// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 4938//===----------------------------------------------------------------------===// 4939 4940multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4941 RegisterClass OutRC, RegisterClass InRC, 4942 X86FoldableSchedWrite sched> { 4943 def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 4944 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4945 Sched<[sched]>; 4946 4947 def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 4948 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4949 Sched<[sched.Folded]>; 4950} 4951 4952multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 4953 X86MemOperand MemOp, X86MemOperand MemYOp, 4954 Predicate prd> { 4955 defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 4956 SchedWriteShuffle.XMM>; 4957 let Predicates = [HasAVX, prd] in 4958 defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 4959 VR128, VR128, SchedWriteShuffle.XMM>, 4960 VEX, VEX_WIG; 4961 let Predicates = [HasAVX2, prd] in 4962 defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 4963 VR256, VR128, WriteVPMOV256>, 4964 VEX, VEX_L, VEX_WIG; 4965} 4966 4967multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4968 X86MemOperand MemYOp, Predicate prd> { 4969 defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 4970 MemOp, MemYOp, prd>; 4971 defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 4972 !strconcat("pmovzx", OpcodeStr), 4973 MemOp, MemYOp, prd>; 4974} 4975 4976defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 4977defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 4978defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 4979 4980defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 4981defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 4982 4983defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 4984 4985// AVX2 Patterns 4986multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 4987 SDNode ExtOp, SDNode InVecOp> { 4988 // Register-Register patterns 4989 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4990 def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 4991 (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 4992 } 4993 let Predicates = [HasAVX2, NoVLX] in { 4994 def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 4995 (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 4996 def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 4997 (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 4998 4999 def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 5000 (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 5001 def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 5002 (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 5003 5004 def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 5005 (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 5006 } 5007 5008 // Simple Register-Memory patterns 5009 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 5010 def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5011 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5012 5013 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 5014 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5015 } 5016 5017 let Predicates = [HasAVX2, NoVLX] in { 5018 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5019 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5020 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5021 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5022 5023 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5024 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5025 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5026 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5027 5028 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5029 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 5030 } 5031 5032 // AVX2 Register-Memory patterns 5033 let Predicates = [HasAVX2, NoVLX] in { 5034 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 5035 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5036 5037 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5038 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5039 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5040 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5041 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 5042 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5043 5044 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 5045 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 5046 5047 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5048 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5049 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))), 5050 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5051 5052 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5053 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5054 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5055 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5056 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 5057 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5058 } 5059} 5060 5061defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 5062defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 5063 5064// SSE4.1/AVX patterns. 5065multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 5066 SDNode ExtOp> { 5067 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5068 def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 5069 (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 5070 } 5071 let Predicates = [HasAVX, NoVLX] in { 5072 def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 5073 (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 5074 def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 5075 (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 5076 5077 def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 5078 (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 5079 def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 5080 (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 5081 5082 def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 5083 (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 5084 } 5085 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5086 def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5087 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5088 } 5089 let Predicates = [HasAVX, NoVLX] in { 5090 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5091 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5092 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5093 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5094 5095 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5096 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5097 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5098 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5099 5100 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5101 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5102 } 5103 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5104 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5105 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5106 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5107 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5108 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 5109 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5110 def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 5111 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5112 } 5113 let Predicates = [HasAVX, NoVLX] in { 5114 def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5115 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5116 def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 5117 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5118 def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 5119 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5120 5121 def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 5122 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5123 def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 5124 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5125 5126 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5127 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5128 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5129 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5130 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 5131 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5132 def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 5133 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5134 5135 def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5136 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5137 def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 5138 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5139 def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 5140 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5141 5142 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5143 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5144 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5145 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5146 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 5147 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5148 def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 5149 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5150 } 5151} 5152 5153defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 5154defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 5155 5156let Predicates = [UseSSE41] in { 5157 defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 5158 defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 5159} 5160 5161//===----------------------------------------------------------------------===// 5162// SSE4.1 - Extract Instructions 5163//===----------------------------------------------------------------------===// 5164 5165/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5166multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 5167 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5168 (ins VR128:$src1, u8imm:$src2), 5169 !strconcat(OpcodeStr, 5170 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5171 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 5172 timm:$src2))]>, 5173 Sched<[WriteVecExtract]>; 5174 let hasSideEffects = 0, mayStore = 1 in 5175 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5176 (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 5177 !strconcat(OpcodeStr, 5178 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5179 [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), 5180 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5181} 5182 5183let Predicates = [HasAVX, NoBWI] in 5184 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG; 5185 5186defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 5187 5188 5189/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 5190multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 5191 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 5192 def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5193 (ins VR128:$src1, u8imm:$src2), 5194 !strconcat(OpcodeStr, 5195 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 5196 Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>; 5197 5198 let hasSideEffects = 0, mayStore = 1 in 5199 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5200 (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 5201 !strconcat(OpcodeStr, 5202 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5203 [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), 5204 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5205} 5206 5207let Predicates = [HasAVX, NoBWI] in 5208 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG; 5209 5210defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 5211 5212 5213/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5214multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 5215 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5216 (ins VR128:$src1, u8imm:$src2), 5217 !strconcat(OpcodeStr, 5218 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5219 [(set GR32:$dst, 5220 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 5221 Sched<[WriteVecExtract]>; 5222 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5223 (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 5224 !strconcat(OpcodeStr, 5225 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5226 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 5227 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5228} 5229 5230let Predicates = [HasAVX, NoDQI] in 5231 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 5232 5233defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 5234 5235/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5236multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 5237 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 5238 (ins VR128:$src1, u8imm:$src2), 5239 !strconcat(OpcodeStr, 5240 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5241 [(set GR64:$dst, 5242 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 5243 Sched<[WriteVecExtract]>; 5244 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5245 (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 5246 !strconcat(OpcodeStr, 5247 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5248 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 5249 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5250} 5251 5252let Predicates = [HasAVX, NoDQI] in 5253 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 5254 5255defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 5256 5257/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 5258/// destination 5259multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 5260 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5261 (ins VR128:$src1, u8imm:$src2), 5262 !strconcat(OpcodeStr, 5263 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5264 [(set GR32orGR64:$dst, 5265 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 5266 Sched<[WriteVecExtract]>; 5267 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5268 (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 5269 !strconcat(OpcodeStr, 5270 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5271 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 5272 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5273} 5274 5275let ExeDomain = SSEPackedSingle in { 5276 let Predicates = [UseAVX] in 5277 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; 5278 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 5279} 5280 5281//===----------------------------------------------------------------------===// 5282// SSE4.1 - Insert Instructions 5283//===----------------------------------------------------------------------===// 5284 5285multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 5286 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5287 (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 5288 !if(Is2Addr, 5289 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5290 !strconcat(asm, 5291 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5292 [(set VR128:$dst, 5293 (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 5294 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5295 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5296 (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 5297 !if(Is2Addr, 5298 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5299 !strconcat(asm, 5300 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5301 [(set VR128:$dst, 5302 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, 5303 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5304} 5305 5306let Predicates = [HasAVX, NoBWI] in 5307 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; 5308let Constraints = "$src1 = $dst" in 5309 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 5310 5311multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 5312 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5313 (ins VR128:$src1, GR32:$src2, u8imm:$src3), 5314 !if(Is2Addr, 5315 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5316 !strconcat(asm, 5317 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5318 [(set VR128:$dst, 5319 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 5320 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5321 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5322 (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 5323 !if(Is2Addr, 5324 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5325 !strconcat(asm, 5326 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5327 [(set VR128:$dst, 5328 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 5329 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5330} 5331 5332let Predicates = [HasAVX, NoDQI] in 5333 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 5334let Constraints = "$src1 = $dst" in 5335 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 5336 5337multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 5338 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5339 (ins VR128:$src1, GR64:$src2, u8imm:$src3), 5340 !if(Is2Addr, 5341 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5342 !strconcat(asm, 5343 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5344 [(set VR128:$dst, 5345 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 5346 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5347 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5348 (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 5349 !if(Is2Addr, 5350 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5351 !strconcat(asm, 5352 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5353 [(set VR128:$dst, 5354 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 5355 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5356} 5357 5358let Predicates = [HasAVX, NoDQI] in 5359 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 5360let Constraints = "$src1 = $dst" in 5361 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 5362 5363// insertps has a few different modes, there's the first two here below which 5364// are optimized inserts that won't zero arbitrary elements in the destination 5365// vector. The next one matches the intrinsic and could zero arbitrary elements 5366// in the target vector. 5367multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 5368 let isCommutable = 1 in 5369 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5370 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 5371 !if(Is2Addr, 5372 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5373 !strconcat(asm, 5374 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5375 [(set VR128:$dst, 5376 (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, 5377 Sched<[SchedWriteFShuffle.XMM]>; 5378 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5379 (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 5380 !if(Is2Addr, 5381 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5382 !strconcat(asm, 5383 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5384 [(set VR128:$dst, 5385 (X86insertps VR128:$src1, 5386 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 5387 timm:$src3))]>, 5388 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 5389} 5390 5391let ExeDomain = SSEPackedSingle in { 5392 let Predicates = [UseAVX] in 5393 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 5394 VEX_4V, VEX_WIG; 5395 let Constraints = "$src1 = $dst" in 5396 defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 5397} 5398 5399//===----------------------------------------------------------------------===// 5400// SSE4.1 - Round Instructions 5401//===----------------------------------------------------------------------===// 5402 5403multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 5404 X86MemOperand x86memop, RegisterClass RC, 5405 ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode, 5406 X86FoldableSchedWrite sched> { 5407 // Intrinsic operation, reg. 5408 // Vector intrinsic operation, reg 5409let Uses = [MXCSR], mayRaiseFPException = 1 in { 5410 def r : SS4AIi8<opc, MRMSrcReg, 5411 (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 5412 !strconcat(OpcodeStr, 5413 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5414 [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, 5415 Sched<[sched]>; 5416 5417 // Vector intrinsic operation, mem 5418 def m : SS4AIi8<opc, MRMSrcMem, 5419 (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 5420 !strconcat(OpcodeStr, 5421 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5422 [(set RC:$dst, 5423 (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, 5424 Sched<[sched.Folded]>; 5425} 5426} 5427 5428multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 5429 string OpcodeStr, X86FoldableSchedWrite sched> { 5430let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5431 def SSr : SS4AIi8<opcss, MRMSrcReg, 5432 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 5433 !strconcat(OpcodeStr, 5434 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5435 []>, Sched<[sched]>; 5436 5437 let mayLoad = 1 in 5438 def SSm : SS4AIi8<opcss, MRMSrcMem, 5439 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 5440 !strconcat(OpcodeStr, 5441 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5442 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5443} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5444 5445let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5446 def SDr : SS4AIi8<opcsd, MRMSrcReg, 5447 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 5448 !strconcat(OpcodeStr, 5449 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5450 []>, Sched<[sched]>; 5451 5452 let mayLoad = 1 in 5453 def SDm : SS4AIi8<opcsd, MRMSrcMem, 5454 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 5455 !strconcat(OpcodeStr, 5456 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5457 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5458} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5459} 5460 5461multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 5462 string OpcodeStr, X86FoldableSchedWrite sched> { 5463let Uses = [MXCSR], mayRaiseFPException = 1 in { 5464let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5465 def SSr : SS4AIi8<opcss, MRMSrcReg, 5466 (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 5467 !strconcat(OpcodeStr, 5468 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5469 []>, Sched<[sched]>; 5470 5471 let mayLoad = 1 in 5472 def SSm : SS4AIi8<opcss, MRMSrcMem, 5473 (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 5474 !strconcat(OpcodeStr, 5475 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5476 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5477} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5478 5479let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5480 def SDr : SS4AIi8<opcsd, MRMSrcReg, 5481 (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 5482 !strconcat(OpcodeStr, 5483 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5484 []>, Sched<[sched]>; 5485 5486 let mayLoad = 1 in 5487 def SDm : SS4AIi8<opcsd, MRMSrcMem, 5488 (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 5489 !strconcat(OpcodeStr, 5490 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5491 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5492} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5493} 5494} 5495 5496multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd, 5497 string OpcodeStr, X86FoldableSchedWrite sched, 5498 ValueType VT32, ValueType VT64, 5499 SDNode OpNode, bit Is2Addr = 1> { 5500let Uses = [MXCSR], mayRaiseFPException = 1 in { 5501let ExeDomain = SSEPackedSingle in { 5502 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 5503 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5504 !if(Is2Addr, 5505 !strconcat(OpcodeStr, 5506 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5507 !strconcat(OpcodeStr, 5508 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5509 [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 5510 Sched<[sched]>; 5511 5512 def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 5513 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 5514 !if(Is2Addr, 5515 !strconcat(OpcodeStr, 5516 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5517 !strconcat(OpcodeStr, 5518 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5519 [(set VR128:$dst, 5520 (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>, 5521 Sched<[sched.Folded, sched.ReadAfterFold]>; 5522} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 5523 5524let ExeDomain = SSEPackedDouble in { 5525 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 5526 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5527 !if(Is2Addr, 5528 !strconcat(OpcodeStr, 5529 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5530 !strconcat(OpcodeStr, 5531 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5532 [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 5533 Sched<[sched]>; 5534 5535 def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, 5536 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 5537 !if(Is2Addr, 5538 !strconcat(OpcodeStr, 5539 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5540 !strconcat(OpcodeStr, 5541 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5542 [(set VR128:$dst, 5543 (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>, 5544 Sched<[sched.Folded, sched.ReadAfterFold]>; 5545} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 5546} 5547} 5548 5549// FP round - roundss, roundps, roundsd, roundpd 5550let Predicates = [HasAVX, NoVLX] in { 5551 let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { 5552 // Intrinsic form 5553 defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 5554 loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>, 5555 VEX, VEX_WIG; 5556 defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 5557 loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>, 5558 VEX, VEX_L, VEX_WIG; 5559 } 5560 5561 let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { 5562 defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 5563 loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>, 5564 VEX, VEX_WIG; 5565 defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 5566 loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>, 5567 VEX, VEX_L, VEX_WIG; 5568 } 5569} 5570let Predicates = [UseAVX] in { 5571 defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 5572 v4f32, v2f64, X86RndScales, 0>, 5573 VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 5574 defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 5575 VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 5576} 5577 5578let Predicates = [UseAVX] in { 5579 def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5580 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; 5581 def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5582 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; 5583} 5584 5585let Predicates = [UseAVX, OptForSize] in { 5586 def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5587 (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5588 def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5589 (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5590} 5591 5592let ExeDomain = SSEPackedSingle in 5593defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 5594 memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>; 5595let ExeDomain = SSEPackedDouble in 5596defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 5597 memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>; 5598 5599defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 5600 5601let Constraints = "$src1 = $dst" in 5602defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 5603 v4f32, v2f64, X86RndScales>; 5604 5605let Predicates = [UseSSE41] in { 5606 def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5607 (ROUNDSSr FR32:$src1, timm:$src2)>; 5608 def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5609 (ROUNDSDr FR64:$src1, timm:$src2)>; 5610} 5611 5612let Predicates = [UseSSE41, OptForSize] in { 5613 def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5614 (ROUNDSSm addr:$src1, timm:$src2)>; 5615 def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5616 (ROUNDSDm addr:$src1, timm:$src2)>; 5617} 5618 5619//===----------------------------------------------------------------------===// 5620// SSE4.1 - Packed Bit Test 5621//===----------------------------------------------------------------------===// 5622 5623// ptest instruction we'll lower to this in X86ISelLowering primarily from 5624// the intel intrinsic that corresponds to this. 5625let Defs = [EFLAGS], Predicates = [HasAVX] in { 5626def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5627 "vptest\t{$src2, $src1|$src1, $src2}", 5628 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5629 Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG; 5630def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5631 "vptest\t{$src2, $src1|$src1, $src2}", 5632 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 5633 Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 5634 VEX, VEX_WIG; 5635 5636def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 5637 "vptest\t{$src2, $src1|$src1, $src2}", 5638 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 5639 Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG; 5640def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 5641 "vptest\t{$src2, $src1|$src1, $src2}", 5642 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 5643 Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 5644 VEX, VEX_L, VEX_WIG; 5645} 5646 5647let Defs = [EFLAGS] in { 5648def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5649 "ptest\t{$src2, $src1|$src1, $src2}", 5650 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5651 Sched<[SchedWriteVecTest.XMM]>; 5652def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5653 "ptest\t{$src2, $src1|$src1, $src2}", 5654 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 5655 Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 5656} 5657 5658// The bit test instructions below are AVX only 5659multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 5660 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 5661 X86FoldableSchedWrite sched> { 5662 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 5663 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5664 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 5665 Sched<[sched]>, VEX; 5666 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 5667 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5668 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 5669 Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 5670} 5671 5672let Defs = [EFLAGS], Predicates = [HasAVX] in { 5673let ExeDomain = SSEPackedSingle in { 5674defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 5675 SchedWriteFTest.XMM>; 5676defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 5677 SchedWriteFTest.YMM>, VEX_L; 5678} 5679let ExeDomain = SSEPackedDouble in { 5680defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 5681 SchedWriteFTest.XMM>; 5682defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 5683 SchedWriteFTest.YMM>, VEX_L; 5684} 5685} 5686 5687//===----------------------------------------------------------------------===// 5688// SSE4.1 - Misc Instructions 5689//===----------------------------------------------------------------------===// 5690 5691let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 5692 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 5693 "popcnt{w}\t{$src, $dst|$dst, $src}", 5694 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 5695 Sched<[WritePOPCNT]>, OpSize16, XS; 5696 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 5697 "popcnt{w}\t{$src, $dst|$dst, $src}", 5698 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 5699 (implicit EFLAGS)]>, 5700 Sched<[WritePOPCNT.Folded]>, OpSize16, XS; 5701 5702 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 5703 "popcnt{l}\t{$src, $dst|$dst, $src}", 5704 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 5705 Sched<[WritePOPCNT]>, OpSize32, XS; 5706 5707 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 5708 "popcnt{l}\t{$src, $dst|$dst, $src}", 5709 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 5710 (implicit EFLAGS)]>, 5711 Sched<[WritePOPCNT.Folded]>, OpSize32, XS; 5712 5713 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 5714 "popcnt{q}\t{$src, $dst|$dst, $src}", 5715 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 5716 Sched<[WritePOPCNT]>, XS; 5717 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 5718 "popcnt{q}\t{$src, $dst|$dst, $src}", 5719 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 5720 (implicit EFLAGS)]>, 5721 Sched<[WritePOPCNT.Folded]>, XS; 5722} 5723 5724// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 5725multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 5726 SDNode OpNode, PatFrag ld_frag, 5727 X86FoldableSchedWrite Sched> { 5728 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5729 (ins VR128:$src), 5730 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5731 [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 5732 Sched<[Sched]>; 5733 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5734 (ins i128mem:$src), 5735 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5736 [(set VR128:$dst, 5737 (v8i16 (OpNode (ld_frag addr:$src))))]>, 5738 Sched<[Sched.Folded]>; 5739} 5740 5741// PHMIN has the same profile as PSAD, thus we use the same scheduling 5742// model, although the naming is misleading. 5743let Predicates = [HasAVX] in 5744defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 5745 X86phminpos, load, 5746 WritePHMINPOS>, VEX, VEX_WIG; 5747defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 5748 X86phminpos, memop, 5749 WritePHMINPOS>; 5750 5751/// SS48I_binop_rm - Simple SSE41 binary operator. 5752multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5753 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5754 X86MemOperand x86memop, X86FoldableSchedWrite sched, 5755 bit Is2Addr = 1> { 5756 let isCommutable = 1 in 5757 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 5758 (ins RC:$src1, RC:$src2), 5759 !if(Is2Addr, 5760 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5761 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5762 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 5763 Sched<[sched]>; 5764 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 5765 (ins RC:$src1, x86memop:$src2), 5766 !if(Is2Addr, 5767 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5768 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5769 [(set RC:$dst, 5770 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 5771 Sched<[sched.Folded, sched.ReadAfterFold]>; 5772} 5773 5774let Predicates = [HasAVX, NoVLX] in { 5775 defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 5776 load, i128mem, SchedWriteVecALU.XMM, 0>, 5777 VEX_4V, VEX_WIG; 5778 defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 5779 load, i128mem, SchedWriteVecALU.XMM, 0>, 5780 VEX_4V, VEX_WIG; 5781 defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 5782 load, i128mem, SchedWriteVecALU.XMM, 0>, 5783 VEX_4V, VEX_WIG; 5784 defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 5785 load, i128mem, SchedWriteVecALU.XMM, 0>, 5786 VEX_4V, VEX_WIG; 5787 defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 5788 load, i128mem, SchedWriteVecIMul.XMM, 0>, 5789 VEX_4V, VEX_WIG; 5790} 5791let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5792 defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 5793 load, i128mem, SchedWriteVecALU.XMM, 0>, 5794 VEX_4V, VEX_WIG; 5795 defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 5796 load, i128mem, SchedWriteVecALU.XMM, 0>, 5797 VEX_4V, VEX_WIG; 5798 defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 5799 load, i128mem, SchedWriteVecALU.XMM, 0>, 5800 VEX_4V, VEX_WIG; 5801 defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 5802 load, i128mem, SchedWriteVecALU.XMM, 0>, 5803 VEX_4V, VEX_WIG; 5804} 5805 5806let Predicates = [HasAVX2, NoVLX] in { 5807 defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 5808 load, i256mem, SchedWriteVecALU.YMM, 0>, 5809 VEX_4V, VEX_L, VEX_WIG; 5810 defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 5811 load, i256mem, SchedWriteVecALU.YMM, 0>, 5812 VEX_4V, VEX_L, VEX_WIG; 5813 defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 5814 load, i256mem, SchedWriteVecALU.YMM, 0>, 5815 VEX_4V, VEX_L, VEX_WIG; 5816 defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 5817 load, i256mem, SchedWriteVecALU.YMM, 0>, 5818 VEX_4V, VEX_L, VEX_WIG; 5819 defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 5820 load, i256mem, SchedWriteVecIMul.YMM, 0>, 5821 VEX_4V, VEX_L, VEX_WIG; 5822} 5823let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 5824 defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 5825 load, i256mem, SchedWriteVecALU.YMM, 0>, 5826 VEX_4V, VEX_L, VEX_WIG; 5827 defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 5828 load, i256mem, SchedWriteVecALU.YMM, 0>, 5829 VEX_4V, VEX_L, VEX_WIG; 5830 defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 5831 load, i256mem, SchedWriteVecALU.YMM, 0>, 5832 VEX_4V, VEX_L, VEX_WIG; 5833 defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 5834 load, i256mem, SchedWriteVecALU.YMM, 0>, 5835 VEX_4V, VEX_L, VEX_WIG; 5836} 5837 5838let Constraints = "$src1 = $dst" in { 5839 defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 5840 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5841 defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 5842 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5843 defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 5844 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5845 defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 5846 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5847 defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 5848 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5849 defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 5850 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5851 defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 5852 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5853 defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 5854 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5855 defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 5856 memop, i128mem, SchedWriteVecIMul.XMM, 1>; 5857} 5858 5859let Predicates = [HasAVX, NoVLX] in 5860 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 5861 load, i128mem, SchedWritePMULLD.XMM, 0>, 5862 VEX_4V, VEX_WIG; 5863let Predicates = [HasAVX] in 5864 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 5865 load, i128mem, SchedWriteVecALU.XMM, 0>, 5866 VEX_4V, VEX_WIG; 5867 5868let Predicates = [HasAVX2, NoVLX] in 5869 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 5870 load, i256mem, SchedWritePMULLD.YMM, 0>, 5871 VEX_4V, VEX_L, VEX_WIG; 5872let Predicates = [HasAVX2] in 5873 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 5874 load, i256mem, SchedWriteVecALU.YMM, 0>, 5875 VEX_4V, VEX_L, VEX_WIG; 5876 5877let Constraints = "$src1 = $dst" in { 5878 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 5879 memop, i128mem, SchedWritePMULLD.XMM, 1>; 5880 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 5881 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5882} 5883 5884/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 5885multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 5886 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 5887 X86MemOperand x86memop, bit Is2Addr, 5888 X86FoldableSchedWrite sched> { 5889 let isCommutable = 1 in 5890 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5891 (ins RC:$src1, RC:$src2, u8imm:$src3), 5892 !if(Is2Addr, 5893 !strconcat(OpcodeStr, 5894 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5895 !strconcat(OpcodeStr, 5896 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5897 [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, 5898 Sched<[sched]>; 5899 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5900 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5901 !if(Is2Addr, 5902 !strconcat(OpcodeStr, 5903 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5904 !strconcat(OpcodeStr, 5905 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5906 [(set RC:$dst, 5907 (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, 5908 Sched<[sched.Folded, sched.ReadAfterFold]>; 5909} 5910 5911/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 5912multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 5913 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5914 X86MemOperand x86memop, bit Is2Addr, 5915 X86FoldableSchedWrite sched> { 5916 let isCommutable = 1 in 5917 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5918 (ins RC:$src1, RC:$src2, u8imm:$src3), 5919 !if(Is2Addr, 5920 !strconcat(OpcodeStr, 5921 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5922 !strconcat(OpcodeStr, 5923 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5924 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 5925 Sched<[sched]>; 5926 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5927 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5928 !if(Is2Addr, 5929 !strconcat(OpcodeStr, 5930 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5931 !strconcat(OpcodeStr, 5932 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5933 [(set RC:$dst, 5934 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 5935 Sched<[sched.Folded, sched.ReadAfterFold]>; 5936} 5937 5938def BlendCommuteImm2 : SDNodeXForm<timm, [{ 5939 uint8_t Imm = N->getZExtValue() & 0x03; 5940 return getI8Imm(Imm ^ 0x03, SDLoc(N)); 5941}]>; 5942 5943def BlendCommuteImm4 : SDNodeXForm<timm, [{ 5944 uint8_t Imm = N->getZExtValue() & 0x0f; 5945 return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 5946}]>; 5947 5948def BlendCommuteImm8 : SDNodeXForm<timm, [{ 5949 uint8_t Imm = N->getZExtValue() & 0xff; 5950 return getI8Imm(Imm ^ 0xff, SDLoc(N)); 5951}]>; 5952 5953// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 5954def BlendScaleImm4 : SDNodeXForm<timm, [{ 5955 uint8_t Imm = N->getZExtValue(); 5956 uint8_t NewImm = 0; 5957 for (unsigned i = 0; i != 4; ++i) { 5958 if (Imm & (1 << i)) 5959 NewImm |= 0x3 << (i * 2); 5960 } 5961 return getI8Imm(NewImm, SDLoc(N)); 5962}]>; 5963 5964// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 5965def BlendScaleImm2 : SDNodeXForm<timm, [{ 5966 uint8_t Imm = N->getZExtValue(); 5967 uint8_t NewImm = 0; 5968 for (unsigned i = 0; i != 2; ++i) { 5969 if (Imm & (1 << i)) 5970 NewImm |= 0xf << (i * 4); 5971 } 5972 return getI8Imm(NewImm, SDLoc(N)); 5973}]>; 5974 5975// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 5976def BlendScaleImm2to4 : SDNodeXForm<timm, [{ 5977 uint8_t Imm = N->getZExtValue(); 5978 uint8_t NewImm = 0; 5979 for (unsigned i = 0; i != 2; ++i) { 5980 if (Imm & (1 << i)) 5981 NewImm |= 0x3 << (i * 2); 5982 } 5983 return getI8Imm(NewImm, SDLoc(N)); 5984}]>; 5985 5986// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 5987def BlendScaleCommuteImm4 : SDNodeXForm<timm, [{ 5988 uint8_t Imm = N->getZExtValue(); 5989 uint8_t NewImm = 0; 5990 for (unsigned i = 0; i != 4; ++i) { 5991 if (Imm & (1 << i)) 5992 NewImm |= 0x3 << (i * 2); 5993 } 5994 return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 5995}]>; 5996 5997// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 5998def BlendScaleCommuteImm2 : SDNodeXForm<timm, [{ 5999 uint8_t Imm = N->getZExtValue(); 6000 uint8_t NewImm = 0; 6001 for (unsigned i = 0; i != 2; ++i) { 6002 if (Imm & (1 << i)) 6003 NewImm |= 0xf << (i * 4); 6004 } 6005 return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 6006}]>; 6007 6008// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 6009def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ 6010 uint8_t Imm = N->getZExtValue(); 6011 uint8_t NewImm = 0; 6012 for (unsigned i = 0; i != 2; ++i) { 6013 if (Imm & (1 << i)) 6014 NewImm |= 0x3 << (i * 2); 6015 } 6016 return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 6017}]>; 6018 6019let Predicates = [HasAVX] in { 6020 let isCommutable = 0 in { 6021 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 6022 VR128, load, i128mem, 0, 6023 SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; 6024 } 6025 6026let Uses = [MXCSR], mayRaiseFPException = 1 in { 6027 let ExeDomain = SSEPackedSingle in 6028 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 6029 VR128, load, f128mem, 0, 6030 SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; 6031 let ExeDomain = SSEPackedDouble in 6032 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 6033 VR128, load, f128mem, 0, 6034 SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; 6035 let ExeDomain = SSEPackedSingle in 6036 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 6037 VR256, load, i256mem, 0, 6038 SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; 6039} 6040} 6041 6042let Predicates = [HasAVX2] in { 6043 let isCommutable = 0 in { 6044 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 6045 VR256, load, i256mem, 0, 6046 SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; 6047 } 6048} 6049 6050let Constraints = "$src1 = $dst" in { 6051 let isCommutable = 0 in { 6052 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 6053 VR128, memop, i128mem, 1, 6054 SchedWriteMPSAD.XMM>; 6055 } 6056 6057 let ExeDomain = SSEPackedSingle in 6058 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 6059 VR128, memop, f128mem, 1, 6060 SchedWriteDPPS.XMM>, SIMD_EXC; 6061 let ExeDomain = SSEPackedDouble in 6062 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 6063 VR128, memop, f128mem, 1, 6064 SchedWriteDPPD.XMM>, SIMD_EXC; 6065} 6066 6067/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 6068multiclass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 6069 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6070 X86MemOperand x86memop, bit Is2Addr, Domain d, 6071 X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 6072let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 6073 let isCommutable = 1 in 6074 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6075 (ins RC:$src1, RC:$src2, u8imm:$src3), 6076 !if(Is2Addr, 6077 !strconcat(OpcodeStr, 6078 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6079 !strconcat(OpcodeStr, 6080 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6081 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 6082 Sched<[sched]>; 6083 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6084 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 6085 !if(Is2Addr, 6086 !strconcat(OpcodeStr, 6087 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6088 !strconcat(OpcodeStr, 6089 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6090 [(set RC:$dst, 6091 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 6092 Sched<[sched.Folded, sched.ReadAfterFold]>; 6093} 6094 6095 // Pattern to commute if load is in first source. 6096 def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), 6097 (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 6098 (commuteXForm timm:$src3))>; 6099} 6100 6101let Predicates = [HasAVX] in { 6102 defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 6103 VR128, load, f128mem, 0, SSEPackedSingle, 6104 SchedWriteFBlend.XMM, BlendCommuteImm4>, 6105 VEX_4V, VEX_WIG; 6106 defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 6107 VR256, load, f256mem, 0, SSEPackedSingle, 6108 SchedWriteFBlend.YMM, BlendCommuteImm8>, 6109 VEX_4V, VEX_L, VEX_WIG; 6110 defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 6111 VR128, load, f128mem, 0, SSEPackedDouble, 6112 SchedWriteFBlend.XMM, BlendCommuteImm2>, 6113 VEX_4V, VEX_WIG; 6114 defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 6115 VR256, load, f256mem, 0, SSEPackedDouble, 6116 SchedWriteFBlend.YMM, BlendCommuteImm4>, 6117 VEX_4V, VEX_L, VEX_WIG; 6118 defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 6119 VR128, load, i128mem, 0, SSEPackedInt, 6120 SchedWriteBlend.XMM, BlendCommuteImm8>, 6121 VEX_4V, VEX_WIG; 6122} 6123 6124let Predicates = [HasAVX2] in { 6125 defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 6126 VR256, load, i256mem, 0, SSEPackedInt, 6127 SchedWriteBlend.YMM, BlendCommuteImm8>, 6128 VEX_4V, VEX_L, VEX_WIG; 6129} 6130 6131// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 6132// ExecutionDomainFixPass will cleanup domains later on. 6133let Predicates = [HasAVX1Only] in { 6134def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 6135 (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6136def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 6137 (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6138def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 6139 (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; 6140 6141// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6142// it from becoming movsd via commuting under optsize. 6143def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6144 (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6145def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 6146 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6147def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 6148 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 6149 6150def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), 6151 (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6152def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), 6153 (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6154def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), 6155 (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; 6156 6157// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6158// it from becoming movss via commuting under optsize. 6159def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6160 (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6161def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), 6162 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6163def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), 6164 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 6165} 6166 6167defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 6168 VR128, memop, f128mem, 1, SSEPackedSingle, 6169 SchedWriteFBlend.XMM, BlendCommuteImm4>; 6170defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 6171 VR128, memop, f128mem, 1, SSEPackedDouble, 6172 SchedWriteFBlend.XMM, BlendCommuteImm2>; 6173defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 6174 VR128, memop, i128mem, 1, SSEPackedInt, 6175 SchedWriteBlend.XMM, BlendCommuteImm8>; 6176 6177let Predicates = [UseSSE41] in { 6178// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6179// it from becoming movss via commuting under optsize. 6180def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6181 (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6182def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), 6183 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6184def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), 6185 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 6186 6187def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6188 (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6189def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), 6190 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6191def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), 6192 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 6193} 6194 6195// For insertion into the zero index (low half) of a 256-bit vector, it is 6196// more efficient to generate a blend with immediate instead of an insert*128. 6197let Predicates = [HasAVX] in { 6198def : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 6199 (VBLENDPDYrri VR256:$src1, 6200 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6201 VR128:$src2, sub_xmm), 0x3)>; 6202def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 6203 (VBLENDPSYrri VR256:$src1, 6204 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6205 VR128:$src2, sub_xmm), 0xf)>; 6206 6207def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 6208 (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6209 VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 6210def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 6211 (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6212 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 6213} 6214 6215/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 6216multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 6217 X86MemOperand x86memop, ValueType VT, 6218 PatFrag mem_frag, SDNode OpNode, 6219 X86FoldableSchedWrite sched> { 6220 def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 6221 (ins RC:$src1, RC:$src2, RC:$src3), 6222 !strconcat(OpcodeStr, 6223 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6224 [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 6225 SSEPackedInt>, TAPD, VEX_4V, 6226 Sched<[sched]>; 6227 6228 def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 6229 (ins RC:$src1, x86memop:$src2, RC:$src3), 6230 !strconcat(OpcodeStr, 6231 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6232 [(set RC:$dst, 6233 (OpNode RC:$src3, (mem_frag addr:$src2), 6234 RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, 6235 Sched<[sched.Folded, sched.ReadAfterFold, 6236 // x86memop:$src2 6237 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 6238 ReadDefault, 6239 // RC::$src3 6240 sched.ReadAfterFold]>; 6241} 6242 6243let Predicates = [HasAVX] in { 6244let ExeDomain = SSEPackedDouble in { 6245defm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 6246 v2f64, loadv2f64, X86Blendv, 6247 SchedWriteFVarBlend.XMM>; 6248defm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 6249 v4f64, loadv4f64, X86Blendv, 6250 SchedWriteFVarBlend.YMM>, VEX_L; 6251} // ExeDomain = SSEPackedDouble 6252let ExeDomain = SSEPackedSingle in { 6253defm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 6254 v4f32, loadv4f32, X86Blendv, 6255 SchedWriteFVarBlend.XMM>; 6256defm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 6257 v8f32, loadv8f32, X86Blendv, 6258 SchedWriteFVarBlend.YMM>, VEX_L; 6259} // ExeDomain = SSEPackedSingle 6260defm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 6261 v16i8, loadv16i8, X86Blendv, 6262 SchedWriteVarBlend.XMM>; 6263} 6264 6265let Predicates = [HasAVX2] in { 6266defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 6267 v32i8, loadv32i8, X86Blendv, 6268 SchedWriteVarBlend.YMM>, VEX_L; 6269} 6270 6271let Predicates = [HasAVX] in { 6272 def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 6273 (v4i32 VR128:$src2))), 6274 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6275 def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 6276 (v2i64 VR128:$src2))), 6277 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6278 def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 6279 (v8i32 VR256:$src2))), 6280 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6281 def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 6282 (v4i64 VR256:$src2))), 6283 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6284} 6285 6286// Prefer a movss or movsd over a blendps when optimizing for size. these were 6287// changed to use blends because blends have better throughput on sandybridge 6288// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6289let Predicates = [HasAVX, OptForSpeed] in { 6290 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6291 (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6292 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6293 (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6294 6295 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6296 (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6297 def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 6298 (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6299 def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 6300 (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6301 6302 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6303 (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6304 def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 6305 (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6306 def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 6307 (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6308 6309 // Move low f32 and clear high bits. 6310 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 6311 (SUBREG_TO_REG (i32 0), 6312 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 6313 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 6314 (i8 1))), sub_xmm)>; 6315 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 6316 (SUBREG_TO_REG (i32 0), 6317 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 6318 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 6319 (i8 3))), sub_xmm)>; 6320} 6321 6322// Prefer a movss or movsd over a blendps when optimizing for size. these were 6323// changed to use blends because blends have better throughput on sandybridge 6324// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6325let Predicates = [UseSSE41, OptForSpeed] in { 6326 // With SSE41 we can use blends for these patterns. 6327 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6328 (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6329 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6330 (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6331 6332 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6333 (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6334 def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 6335 (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6336 def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 6337 (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6338 6339 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6340 (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6341 def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 6342 (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6343 def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 6344 (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6345} 6346 6347 6348/// SS41I_ternary - SSE 4.1 ternary operator 6349let Uses = [XMM0], Constraints = "$src1 = $dst" in { 6350 multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 6351 PatFrag mem_frag, X86MemOperand x86memop, 6352 SDNode OpNode, X86FoldableSchedWrite sched> { 6353 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6354 (ins VR128:$src1, VR128:$src2), 6355 !strconcat(OpcodeStr, 6356 "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6357 [(set VR128:$dst, 6358 (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 6359 Sched<[sched]>; 6360 6361 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6362 (ins VR128:$src1, x86memop:$src2), 6363 !strconcat(OpcodeStr, 6364 "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6365 [(set VR128:$dst, 6366 (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 6367 Sched<[sched.Folded, sched.ReadAfterFold]>; 6368 } 6369} 6370 6371let ExeDomain = SSEPackedDouble in 6372defm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 6373 X86Blendv, SchedWriteFVarBlend.XMM>; 6374let ExeDomain = SSEPackedSingle in 6375defm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 6376 X86Blendv, SchedWriteFVarBlend.XMM>; 6377defm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 6378 X86Blendv, SchedWriteVarBlend.XMM>; 6379 6380// Aliases with the implicit xmm0 argument 6381def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6382 (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 6383def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6384 (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 6385def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6386 (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 6387def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6388 (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 6389def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6390 (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 6391def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6392 (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 6393 6394let Predicates = [UseSSE41] in { 6395 def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 6396 (v4i32 VR128:$src2))), 6397 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6398 def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 6399 (v2i64 VR128:$src2))), 6400 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6401} 6402 6403let AddedComplexity = 400 in { // Prefer non-temporal versions 6404 6405let Predicates = [HasAVX, NoVLX] in 6406def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6407 "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6408 Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; 6409let Predicates = [HasAVX2, NoVLX] in 6410def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 6411 "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6412 Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; 6413def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6414 "movntdqa\t{$src, $dst|$dst, $src}", []>, 6415 Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 6416 6417let Predicates = [HasAVX2, NoVLX] in { 6418 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 6419 (VMOVNTDQAYrm addr:$src)>; 6420 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 6421 (VMOVNTDQAYrm addr:$src)>; 6422 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 6423 (VMOVNTDQAYrm addr:$src)>; 6424 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 6425 (VMOVNTDQAYrm addr:$src)>; 6426 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 6427 (VMOVNTDQAYrm addr:$src)>; 6428 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 6429 (VMOVNTDQAYrm addr:$src)>; 6430} 6431 6432let Predicates = [HasAVX, NoVLX] in { 6433 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6434 (VMOVNTDQArm addr:$src)>; 6435 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6436 (VMOVNTDQArm addr:$src)>; 6437 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6438 (VMOVNTDQArm addr:$src)>; 6439 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6440 (VMOVNTDQArm addr:$src)>; 6441 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6442 (VMOVNTDQArm addr:$src)>; 6443 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6444 (VMOVNTDQArm addr:$src)>; 6445} 6446 6447let Predicates = [UseSSE41] in { 6448 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6449 (MOVNTDQArm addr:$src)>; 6450 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6451 (MOVNTDQArm addr:$src)>; 6452 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6453 (MOVNTDQArm addr:$src)>; 6454 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6455 (MOVNTDQArm addr:$src)>; 6456 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6457 (MOVNTDQArm addr:$src)>; 6458 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6459 (MOVNTDQArm addr:$src)>; 6460} 6461 6462} // AddedComplexity 6463 6464//===----------------------------------------------------------------------===// 6465// SSE4.2 - Compare Instructions 6466//===----------------------------------------------------------------------===// 6467 6468/// SS42I_binop_rm - Simple SSE 4.2 binary operator 6469multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6470 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6471 X86MemOperand x86memop, X86FoldableSchedWrite sched, 6472 bit Is2Addr = 1> { 6473 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 6474 (ins RC:$src1, RC:$src2), 6475 !if(Is2Addr, 6476 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6477 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6478 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6479 Sched<[sched]>; 6480 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 6481 (ins RC:$src1, x86memop:$src2), 6482 !if(Is2Addr, 6483 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6484 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6485 [(set RC:$dst, 6486 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 6487 Sched<[sched.Folded, sched.ReadAfterFold]>; 6488} 6489 6490let Predicates = [HasAVX] in 6491 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 6492 load, i128mem, SchedWriteVecALU.XMM, 0>, 6493 VEX_4V, VEX_WIG; 6494 6495let Predicates = [HasAVX2] in 6496 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 6497 load, i256mem, SchedWriteVecALU.YMM, 0>, 6498 VEX_4V, VEX_L, VEX_WIG; 6499 6500let Constraints = "$src1 = $dst" in 6501 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 6502 memop, i128mem, SchedWriteVecALU.XMM>; 6503 6504//===----------------------------------------------------------------------===// 6505// SSE4.2 - String/text Processing Instructions 6506//===----------------------------------------------------------------------===// 6507 6508multiclass pcmpistrm_SS42AI<string asm> { 6509 def rr : SS42AI<0x62, MRMSrcReg, (outs), 6510 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6511 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6512 []>, Sched<[WritePCmpIStrM]>; 6513 let mayLoad = 1 in 6514 def rm :SS42AI<0x62, MRMSrcMem, (outs), 6515 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6516 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6517 []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 6518} 6519 6520let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 6521 let Predicates = [HasAVX] in 6522 defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; 6523 defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 6524} 6525 6526multiclass SS42AI_pcmpestrm<string asm> { 6527 def rr : SS42AI<0x60, MRMSrcReg, (outs), 6528 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6529 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6530 []>, Sched<[WritePCmpEStrM]>; 6531 let mayLoad = 1 in 6532 def rm : SS42AI<0x60, MRMSrcMem, (outs), 6533 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6534 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6535 []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 6536} 6537 6538let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6539 let Predicates = [HasAVX] in 6540 defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; 6541 defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 6542} 6543 6544multiclass SS42AI_pcmpistri<string asm> { 6545 def rr : SS42AI<0x63, MRMSrcReg, (outs), 6546 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6547 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6548 []>, Sched<[WritePCmpIStrI]>; 6549 let mayLoad = 1 in 6550 def rm : SS42AI<0x63, MRMSrcMem, (outs), 6551 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6552 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6553 []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 6554} 6555 6556let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 6557 let Predicates = [HasAVX] in 6558 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; 6559 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 6560} 6561 6562multiclass SS42AI_pcmpestri<string asm> { 6563 def rr : SS42AI<0x61, MRMSrcReg, (outs), 6564 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6565 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6566 []>, Sched<[WritePCmpEStrI]>; 6567 let mayLoad = 1 in 6568 def rm : SS42AI<0x61, MRMSrcMem, (outs), 6569 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6570 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6571 []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 6572} 6573 6574let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6575 let Predicates = [HasAVX] in 6576 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; 6577 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 6578} 6579 6580//===----------------------------------------------------------------------===// 6581// SSE4.2 - CRC Instructions 6582//===----------------------------------------------------------------------===// 6583 6584// No CRC instructions have AVX equivalents 6585 6586// crc intrinsic instruction 6587// This set of instructions are only rm, the only difference is the size 6588// of r and m. 6589class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 6590 RegisterClass RCIn, SDPatternOperator Int> : 6591 SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 6592 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6593 [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, 6594 Sched<[WriteCRC32]>; 6595 6596class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 6597 X86MemOperand x86memop, SDPatternOperator Int> : 6598 SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 6599 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6600 [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, 6601 Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; 6602 6603let Constraints = "$src1 = $dst" in { 6604 def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 6605 int_x86_sse42_crc32_32_8>; 6606 def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 6607 int_x86_sse42_crc32_32_8>; 6608 def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 6609 int_x86_sse42_crc32_32_16>, OpSize16; 6610 def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 6611 int_x86_sse42_crc32_32_16>, OpSize16; 6612 def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 6613 int_x86_sse42_crc32_32_32>, OpSize32; 6614 def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 6615 int_x86_sse42_crc32_32_32>, OpSize32; 6616 def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 6617 int_x86_sse42_crc32_64_64>, REX_W; 6618 def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 6619 int_x86_sse42_crc32_64_64>, REX_W; 6620 let hasSideEffects = 0 in { 6621 let mayLoad = 1 in 6622 def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 6623 null_frag>, REX_W; 6624 def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 6625 null_frag>, REX_W; 6626 } 6627} 6628 6629//===----------------------------------------------------------------------===// 6630// SHA-NI Instructions 6631//===----------------------------------------------------------------------===// 6632 6633// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 6634multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 6635 X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 6636 def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 6637 (ins VR128:$src1, VR128:$src2), 6638 !if(UsesXMM0, 6639 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6640 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6641 [!if(UsesXMM0, 6642 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 6643 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 6644 T8PS, Sched<[sched]>; 6645 6646 def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 6647 (ins VR128:$src1, i128mem:$src2), 6648 !if(UsesXMM0, 6649 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6650 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6651 [!if(UsesXMM0, 6652 (set VR128:$dst, (IntId VR128:$src1, 6653 (memop addr:$src2), XMM0)), 6654 (set VR128:$dst, (IntId VR128:$src1, 6655 (memop addr:$src2))))]>, T8PS, 6656 Sched<[sched.Folded, sched.ReadAfterFold]>; 6657} 6658 6659let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 6660 def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 6661 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6662 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6663 [(set VR128:$dst, 6664 (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 6665 (i8 timm:$src3)))]>, TAPS, 6666 Sched<[SchedWriteVecIMul.XMM]>; 6667 def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 6668 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6669 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6670 [(set VR128:$dst, 6671 (int_x86_sha1rnds4 VR128:$src1, 6672 (memop addr:$src2), 6673 (i8 timm:$src3)))]>, TAPS, 6674 Sched<[SchedWriteVecIMul.XMM.Folded, 6675 SchedWriteVecIMul.XMM.ReadAfterFold]>; 6676 6677 defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 6678 SchedWriteVecIMul.XMM>; 6679 defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 6680 SchedWriteVecIMul.XMM>; 6681 defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 6682 SchedWriteVecIMul.XMM>; 6683 6684 let Uses=[XMM0] in 6685 defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 6686 SchedWriteVecIMul.XMM, 1>; 6687 6688 defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 6689 SchedWriteVecIMul.XMM>; 6690 defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 6691 SchedWriteVecIMul.XMM>; 6692} 6693 6694// Aliases with explicit %xmm0 6695def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6696 (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; 6697def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6698 (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; 6699 6700//===----------------------------------------------------------------------===// 6701// AES-NI Instructions 6702//===----------------------------------------------------------------------===// 6703 6704multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 6705 Intrinsic IntId, PatFrag ld_frag, 6706 bit Is2Addr = 0, RegisterClass RC = VR128, 6707 X86MemOperand MemOp = i128mem> { 6708 let AsmString = OpcodeStr# 6709 !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 6710 "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 6711 def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 6712 (ins RC:$src1, RC:$src2), "", 6713 [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 6714 Sched<[WriteAESDecEnc]>; 6715 def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 6716 (ins RC:$src1, MemOp:$src2), "", 6717 [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 6718 Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 6719 } 6720} 6721 6722// Perform One Round of an AES Encryption/Decryption Flow 6723let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 6724 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 6725 int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG; 6726 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 6727 int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG; 6728 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 6729 int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG; 6730 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 6731 int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG; 6732} 6733 6734let Predicates = [NoVLX, HasVAES] in { 6735 defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 6736 int_x86_aesni_aesenc_256, load, 0, VR256, 6737 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6738 defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 6739 int_x86_aesni_aesenclast_256, load, 0, VR256, 6740 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6741 defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 6742 int_x86_aesni_aesdec_256, load, 0, VR256, 6743 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6744 defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 6745 int_x86_aesni_aesdeclast_256, load, 0, VR256, 6746 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6747} 6748 6749let Constraints = "$src1 = $dst" in { 6750 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 6751 int_x86_aesni_aesenc, memop, 1>; 6752 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 6753 int_x86_aesni_aesenclast, memop, 1>; 6754 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 6755 int_x86_aesni_aesdec, memop, 1>; 6756 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 6757 int_x86_aesni_aesdeclast, memop, 1>; 6758} 6759 6760// Perform the AES InvMixColumn Transformation 6761let Predicates = [HasAVX, HasAES] in { 6762 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6763 (ins VR128:$src1), 6764 "vaesimc\t{$src1, $dst|$dst, $src1}", 6765 [(set VR128:$dst, 6766 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 6767 VEX, VEX_WIG; 6768 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6769 (ins i128mem:$src1), 6770 "vaesimc\t{$src1, $dst|$dst, $src1}", 6771 [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 6772 Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; 6773} 6774def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6775 (ins VR128:$src1), 6776 "aesimc\t{$src1, $dst|$dst, $src1}", 6777 [(set VR128:$dst, 6778 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 6779def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6780 (ins i128mem:$src1), 6781 "aesimc\t{$src1, $dst|$dst, $src1}", 6782 [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 6783 Sched<[WriteAESIMC.Folded]>; 6784 6785// AES Round Key Generation Assist 6786let Predicates = [HasAVX, HasAES] in { 6787 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6788 (ins VR128:$src1, u8imm:$src2), 6789 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6790 [(set VR128:$dst, 6791 (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 6792 Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; 6793 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6794 (ins i128mem:$src1, u8imm:$src2), 6795 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6796 [(set VR128:$dst, 6797 (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, 6798 Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; 6799} 6800def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6801 (ins VR128:$src1, u8imm:$src2), 6802 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6803 [(set VR128:$dst, 6804 (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 6805 Sched<[WriteAESKeyGen]>; 6806def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6807 (ins i128mem:$src1, u8imm:$src2), 6808 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6809 [(set VR128:$dst, 6810 (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, 6811 Sched<[WriteAESKeyGen.Folded]>; 6812 6813//===----------------------------------------------------------------------===// 6814// PCLMUL Instructions 6815//===----------------------------------------------------------------------===// 6816 6817// Immediate transform to help with commuting. 6818def PCLMULCommuteImm : SDNodeXForm<timm, [{ 6819 uint8_t Imm = N->getZExtValue(); 6820 return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 6821}]>; 6822 6823// SSE carry-less Multiplication instructions 6824let Predicates = [NoAVX, HasPCLMUL] in { 6825 let Constraints = "$src1 = $dst" in { 6826 let isCommutable = 1 in 6827 def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 6828 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6829 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6830 [(set VR128:$dst, 6831 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, 6832 Sched<[WriteCLMul]>; 6833 6834 def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 6835 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6836 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6837 [(set VR128:$dst, 6838 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 6839 timm:$src3))]>, 6840 Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6841 } // Constraints = "$src1 = $dst" 6842 6843 def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 6844 (i8 timm:$src3)), 6845 (PCLMULQDQrm VR128:$src1, addr:$src2, 6846 (PCLMULCommuteImm timm:$src3))>; 6847} // Predicates = [NoAVX, HasPCLMUL] 6848 6849// SSE aliases 6850foreach HI = ["hq","lq"] in 6851foreach LO = ["hq","lq"] in { 6852 def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6853 (PCLMULQDQrr VR128:$dst, VR128:$src, 6854 !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6855 def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6856 (PCLMULQDQrm VR128:$dst, i128mem:$src, 6857 !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6858} 6859 6860// AVX carry-less Multiplication instructions 6861multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 6862 PatFrag LdFrag, Intrinsic IntId> { 6863 let isCommutable = 1 in 6864 def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 6865 (ins RC:$src1, RC:$src2, u8imm:$src3), 6866 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6867 [(set RC:$dst, 6868 (IntId RC:$src1, RC:$src2, timm:$src3))]>, 6869 Sched<[WriteCLMul]>; 6870 6871 def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 6872 (ins RC:$src1, MemOp:$src2, u8imm:$src3), 6873 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6874 [(set RC:$dst, 6875 (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, 6876 Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6877 6878 // We can commute a load in the first operand by swapping the sources and 6879 // rotating the immediate. 6880 def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), 6881 (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, 6882 (PCLMULCommuteImm timm:$src3))>; 6883} 6884 6885let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 6886defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 6887 int_x86_pclmulqdq>, VEX_4V, VEX_WIG; 6888 6889let Predicates = [NoVLX, HasVPCLMULQDQ] in 6890defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 6891 int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; 6892 6893multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 6894 X86MemOperand MemOp, string Hi, string Lo> { 6895 def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6896 (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, 6897 !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6898 def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6899 (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, 6900 !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6901} 6902 6903multiclass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 6904 X86MemOperand MemOp> { 6905 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 6906 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 6907 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 6908 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 6909} 6910 6911// AVX aliases 6912defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 6913defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 6914 6915//===----------------------------------------------------------------------===// 6916// SSE4A Instructions 6917//===----------------------------------------------------------------------===// 6918 6919let Predicates = [HasSSE4A] in { 6920 6921let ExeDomain = SSEPackedInt in { 6922let Constraints = "$src = $dst" in { 6923def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 6924 (ins VR128:$src, u8imm:$len, u8imm:$idx), 6925 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 6926 [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, 6927 timm:$idx))]>, 6928 PD, Sched<[SchedWriteVecALU.XMM]>; 6929def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6930 (ins VR128:$src, VR128:$mask), 6931 "extrq\t{$mask, $src|$src, $mask}", 6932 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 6933 VR128:$mask))]>, 6934 PD, Sched<[SchedWriteVecALU.XMM]>; 6935 6936def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 6937 (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 6938 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 6939 [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 6940 timm:$len, timm:$idx))]>, 6941 XD, Sched<[SchedWriteVecALU.XMM]>; 6942def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6943 (ins VR128:$src, VR128:$mask), 6944 "insertq\t{$mask, $src|$src, $mask}", 6945 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 6946 VR128:$mask))]>, 6947 XD, Sched<[SchedWriteVecALU.XMM]>; 6948} 6949} // ExeDomain = SSEPackedInt 6950 6951// Non-temporal (unaligned) scalar stores. 6952let AddedComplexity = 400 in { // Prefer non-temporal versions 6953let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 6954def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 6955 "movntss\t{$src, $dst|$dst, $src}", []>, XS; 6956 6957def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6958 "movntsd\t{$src, $dst|$dst, $src}", []>, XD; 6959} // SchedRW 6960 6961def : Pat<(nontemporalstore FR32:$src, addr:$dst), 6962 (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 6963 6964def : Pat<(nontemporalstore FR64:$src, addr:$dst), 6965 (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 6966 6967} // AddedComplexity 6968} // HasSSE4A 6969 6970//===----------------------------------------------------------------------===// 6971// AVX Instructions 6972//===----------------------------------------------------------------------===// 6973 6974//===----------------------------------------------------------------------===// 6975// VBROADCAST - Load from memory and broadcast to all elements of the 6976// destination operand 6977// 6978class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 6979 X86MemOperand x86memop, ValueType VT, 6980 PatFrag bcast_frag, SchedWrite Sched> : 6981 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 6982 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6983 [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, 6984 Sched<[Sched]>, VEX; 6985 6986// AVX2 adds register forms 6987class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 6988 ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 6989 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 6990 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6991 [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 6992 Sched<[Sched]>, VEX; 6993 6994let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 6995 def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 6996 f32mem, v4f32, X86VBroadcastld32, 6997 SchedWriteFShuffle.XMM.Folded>; 6998 def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 6999 f32mem, v8f32, X86VBroadcastld32, 7000 SchedWriteFShuffle.XMM.Folded>, VEX_L; 7001} 7002let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 7003def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 7004 v4f64, X86VBroadcastld64, 7005 SchedWriteFShuffle.XMM.Folded>, VEX_L; 7006 7007let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 7008 def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 7009 v4f32, v4f32, SchedWriteFShuffle.XMM>; 7010 def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 7011 v8f32, v4f32, WriteFShuffle256>, VEX_L; 7012} 7013let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 7014def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 7015 v4f64, v2f64, WriteFShuffle256>, VEX_L; 7016 7017//===----------------------------------------------------------------------===// 7018// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 7019// halves of a 256-bit vector. 7020// 7021let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 7022def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 7023 (ins i128mem:$src), 7024 "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 7025 Sched<[WriteShuffleLd]>, VEX, VEX_L; 7026 7027let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 7028 ExeDomain = SSEPackedSingle in 7029def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 7030 (ins f128mem:$src), 7031 "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 7032 Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 7033 7034let Predicates = [HasAVX, NoVLX] in { 7035def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 7036 (VBROADCASTF128 addr:$src)>; 7037def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 7038 (VBROADCASTF128 addr:$src)>; 7039// NOTE: We're using FP instructions here, but execution domain fixing can 7040// convert to integer when profitable. 7041def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 7042 (VBROADCASTF128 addr:$src)>; 7043def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 7044 (VBROADCASTF128 addr:$src)>; 7045def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 7046 (VBROADCASTF128 addr:$src)>; 7047def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 7048 (VBROADCASTF128 addr:$src)>; 7049} 7050 7051//===----------------------------------------------------------------------===// 7052// VINSERTF128 - Insert packed floating-point values 7053// 7054let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7055def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7056 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7057 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7058 []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 7059let mayLoad = 1 in 7060def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7061 (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 7062 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7063 []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7064} 7065 7066// To create a 256-bit all ones value, we should produce VCMPTRUEPS 7067// with YMM register containing zero. 7068// FIXME: Avoid producing vxorps to clear the fake inputs. 7069let Predicates = [HasAVX1Only] in { 7070def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 7071} 7072 7073multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To, 7074 PatFrag memop_frag> { 7075 def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 7076 (iPTR imm)), 7077 (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 7078 (INSERT_get_vinsert128_imm VR256:$ins))>; 7079 def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 7080 (From (memop_frag addr:$src2)), 7081 (iPTR imm)), 7082 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7083 (INSERT_get_vinsert128_imm VR256:$ins))>; 7084} 7085 7086let Predicates = [HasAVX, NoVLX] in { 7087 defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>; 7088 defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>; 7089} 7090 7091let Predicates = [HasAVX1Only] in { 7092 defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>; 7093 defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>; 7094 defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>; 7095 defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>; 7096} 7097 7098//===----------------------------------------------------------------------===// 7099// VEXTRACTF128 - Extract packed floating-point values 7100// 7101let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7102def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7103 (ins VR256:$src1, u8imm:$src2), 7104 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7105 []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 7106let mayStore = 1 in 7107def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7108 (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 7109 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7110 []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 7111} 7112 7113multiclass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 7114 def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7115 (To (!cast<Instruction>(InstrStr#rr) 7116 (From VR256:$src1), 7117 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7118 def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 7119 (iPTR imm))), addr:$dst), 7120 (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 7121 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7122} 7123 7124// AVX1 patterns 7125let Predicates = [HasAVX, NoVLX] in { 7126 defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 7127 defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 7128} 7129 7130let Predicates = [HasAVX1Only] in { 7131 defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 7132 defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 7133 defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 7134 defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 7135} 7136 7137//===----------------------------------------------------------------------===// 7138// VMASKMOV - Conditional SIMD Packed Loads and Stores 7139// 7140multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 7141 Intrinsic IntLd, Intrinsic IntLd256, 7142 Intrinsic IntSt, Intrinsic IntSt256, 7143 X86SchedWriteMaskMove schedX, 7144 X86SchedWriteMaskMove schedY> { 7145 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 7146 (ins VR128:$src1, f128mem:$src2), 7147 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7148 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7149 VEX_4V, Sched<[schedX.RM]>; 7150 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 7151 (ins VR256:$src1, f256mem:$src2), 7152 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7153 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7154 VEX_4V, VEX_L, Sched<[schedY.RM]>; 7155 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 7156 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 7157 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7158 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 7159 VEX_4V, Sched<[schedX.MR]>; 7160 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 7161 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 7162 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7163 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7164 VEX_4V, VEX_L, Sched<[schedY.MR]>; 7165} 7166 7167let ExeDomain = SSEPackedSingle in 7168defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 7169 int_x86_avx_maskload_ps, 7170 int_x86_avx_maskload_ps_256, 7171 int_x86_avx_maskstore_ps, 7172 int_x86_avx_maskstore_ps_256, 7173 WriteFMaskMove32, WriteFMaskMove32Y>; 7174let ExeDomain = SSEPackedDouble in 7175defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 7176 int_x86_avx_maskload_pd, 7177 int_x86_avx_maskload_pd_256, 7178 int_x86_avx_maskstore_pd, 7179 int_x86_avx_maskstore_pd_256, 7180 WriteFMaskMove64, WriteFMaskMove64Y>; 7181 7182//===----------------------------------------------------------------------===// 7183// AVX_VNNI 7184//===----------------------------------------------------------------------===// 7185let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst", 7186 ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in 7187multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7188 bit IsCommutable> { 7189 let isCommutable = IsCommutable in 7190 def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 7191 (ins VR128:$src1, VR128:$src2, VR128:$src3), 7192 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7193 [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, 7194 VR128:$src2, VR128:$src3)))]>, 7195 VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7196 7197 def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 7198 (ins VR128:$src1, VR128:$src2, i128mem:$src3), 7199 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7200 [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, 7201 (loadv4i32 addr:$src3))))]>, 7202 VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7203 7204 let isCommutable = IsCommutable in 7205 def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 7206 (ins VR256:$src1, VR256:$src2, VR256:$src3), 7207 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7208 [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, 7209 VR256:$src2, VR256:$src3)))]>, 7210 VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7211 7212 def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 7213 (ins VR256:$src1, VR256:$src2, i256mem:$src3), 7214 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7215 [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, 7216 (loadv8i32 addr:$src3))))]>, 7217 VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7218} 7219 7220defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>; 7221defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>; 7222defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>; 7223defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>; 7224 7225def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 7226 (X86vpmaddwd node:$lhs, node:$rhs), [{ 7227 return N->hasOneUse(); 7228}]>; 7229 7230let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { 7231 def : Pat<(v8i32 (add VR256:$src1, 7232 (X86vpmaddwd_su VR256:$src2, VR256:$src3))), 7233 (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; 7234 def : Pat<(v8i32 (add VR256:$src1, 7235 (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), 7236 (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; 7237 def : Pat<(v4i32 (add VR128:$src1, 7238 (X86vpmaddwd_su VR128:$src2, VR128:$src3))), 7239 (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 7240 def : Pat<(v4i32 (add VR128:$src1, 7241 (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), 7242 (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; 7243} 7244 7245//===----------------------------------------------------------------------===// 7246// VPERMIL - Permute Single and Double Floating-Point Values 7247// 7248 7249multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 7250 RegisterClass RC, X86MemOperand x86memop_f, 7251 X86MemOperand x86memop_i, 7252 ValueType f_vt, ValueType i_vt, 7253 X86FoldableSchedWrite sched, 7254 X86FoldableSchedWrite varsched> { 7255 let Predicates = [HasAVX, NoVLX] in { 7256 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 7257 (ins RC:$src1, RC:$src2), 7258 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7259 [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, 7260 Sched<[varsched]>; 7261 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 7262 (ins RC:$src1, x86memop_i:$src2), 7263 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7264 [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 7265 (i_vt (load addr:$src2)))))]>, VEX_4V, 7266 Sched<[varsched.Folded, sched.ReadAfterFold]>; 7267 7268 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 7269 (ins RC:$src1, u8imm:$src2), 7270 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7271 [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, 7272 Sched<[sched]>; 7273 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 7274 (ins x86memop_f:$src1, u8imm:$src2), 7275 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7276 [(set RC:$dst, 7277 (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, 7278 Sched<[sched.Folded]>; 7279 }// Predicates = [HasAVX, NoVLX] 7280} 7281 7282let ExeDomain = SSEPackedSingle in { 7283 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 7284 v4f32, v4i32, SchedWriteFShuffle.XMM, 7285 SchedWriteFVarShuffle.XMM>; 7286 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 7287 v8f32, v8i32, SchedWriteFShuffle.YMM, 7288 SchedWriteFVarShuffle.YMM>, VEX_L; 7289} 7290let ExeDomain = SSEPackedDouble in { 7291 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 7292 v2f64, v2i64, SchedWriteFShuffle.XMM, 7293 SchedWriteFVarShuffle.XMM>; 7294 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 7295 v4f64, v4i64, SchedWriteFShuffle.YMM, 7296 SchedWriteFVarShuffle.YMM>, VEX_L; 7297} 7298 7299//===----------------------------------------------------------------------===// 7300// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7301// 7302 7303let ExeDomain = SSEPackedSingle in { 7304let isCommutable = 1 in 7305def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7306 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7307 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7308 VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; 7309def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7310 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7311 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7312 VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 7313} 7314 7315// Immediate transform to help with commuting. 7316def Perm2XCommuteImm : SDNodeXForm<timm, [{ 7317 return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 7318}]>; 7319 7320multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { 7321 def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), 7322 (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; 7323 def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), 7324 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; 7325 // Pattern with load in other operand. 7326 def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), 7327 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7328 (Perm2XCommuteImm timm:$imm))>; 7329} 7330 7331let Predicates = [HasAVX] in { 7332 defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; 7333 defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; 7334} 7335 7336let Predicates = [HasAVX1Only] in { 7337 defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; 7338 defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; 7339 defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; 7340 defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; 7341} 7342 7343//===----------------------------------------------------------------------===// 7344// VZERO - Zero YMM registers 7345// Note: These instruction do not affect the YMM16-YMM31. 7346// 7347 7348let SchedRW = [WriteSystem] in { 7349let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 7350 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 7351 // Zero All YMM registers 7352 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7353 [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, 7354 Requires<[HasAVX]>, VEX_WIG; 7355 7356 // Zero Upper bits of YMM registers 7357 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7358 [(int_x86_avx_vzeroupper)]>, PS, VEX, 7359 Requires<[HasAVX]>, VEX_WIG; 7360} // Defs 7361} // SchedRW 7362 7363//===----------------------------------------------------------------------===// 7364// Half precision conversion instructions 7365// 7366 7367multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 7368 X86FoldableSchedWrite sched> { 7369 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7370 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7371 [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>, 7372 T8PD, VEX, Sched<[sched]>; 7373 let hasSideEffects = 0, mayLoad = 1 in 7374 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7375 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7376 []>, T8PD, VEX, Sched<[sched.Folded]>; 7377} 7378 7379multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 7380 SchedWrite RR, SchedWrite MR> { 7381 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 7382 (ins RC:$src1, i32u8imm:$src2), 7383 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7384 [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>, 7385 TAPD, VEX, Sched<[RR]>; 7386 let hasSideEffects = 0, mayStore = 1 in 7387 def mr : Ii8<0x1D, MRMDestMem, (outs), 7388 (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 7389 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7390 TAPD, VEX, Sched<[MR]>; 7391} 7392 7393let Predicates = [HasF16C, NoVLX] in { 7394 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC; 7395 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC; 7396 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 7397 WriteCvtPS2PHSt>, SIMD_EXC; 7398 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 7399 WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC; 7400 7401 // Pattern match vcvtph2ps of a scalar i64 load. 7402 def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 7403 (VCVTPH2PSrm addr:$src)>; 7404 def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 7405 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 7406 (VCVTPH2PSrm addr:$src)>; 7407 def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))), 7408 (VCVTPH2PSYrm addr:$src)>; 7409 7410 def : Pat<(store (f64 (extractelt 7411 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 7412 (iPTR 0))), addr:$dst), 7413 (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 7414 def : Pat<(store (i64 (extractelt 7415 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 7416 (iPTR 0))), addr:$dst), 7417 (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 7418 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), 7419 (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; 7420} 7421 7422//===----------------------------------------------------------------------===// 7423// AVX2 Instructions 7424//===----------------------------------------------------------------------===// 7425 7426/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 7427multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 7428 ValueType OpVT, X86FoldableSchedWrite sched, 7429 RegisterClass RC, 7430 X86MemOperand x86memop, SDNodeXForm commuteXForm> { 7431 let isCommutable = 1 in 7432 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 7433 (ins RC:$src1, RC:$src2, u8imm:$src3), 7434 !strconcat(OpcodeStr, 7435 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7436 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 7437 Sched<[sched]>, VEX_4V; 7438 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 7439 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 7440 !strconcat(OpcodeStr, 7441 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7442 [(set RC:$dst, 7443 (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, 7444 Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; 7445 7446 // Pattern to commute if load is in first source. 7447 def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), 7448 (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 7449 (commuteXForm timm:$src3))>; 7450} 7451 7452let Predicates = [HasAVX2] in { 7453defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 7454 SchedWriteBlend.XMM, VR128, i128mem, 7455 BlendCommuteImm4>; 7456defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 7457 SchedWriteBlend.YMM, VR256, i256mem, 7458 BlendCommuteImm8>, VEX_L; 7459 7460def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 7461 (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; 7462def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 7463 (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 7464def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 7465 (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 7466 7467def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 7468 (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; 7469def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 7470 (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; 7471def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 7472 (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; 7473} 7474 7475// For insertion into the zero index (low half) of a 256-bit vector, it is 7476// more efficient to generate a blend with immediate instead of an insert*128. 7477// NOTE: We're using FP instructions here, but execution domain fixing should 7478// take care of using integer instructions when profitable. 7479let Predicates = [HasAVX] in { 7480def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 7481 (VBLENDPSYrri VR256:$src1, 7482 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7483 VR128:$src2, sub_xmm), 0xf)>; 7484def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 7485 (VBLENDPSYrri VR256:$src1, 7486 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7487 VR128:$src2, sub_xmm), 0xf)>; 7488def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 7489 (VBLENDPSYrri VR256:$src1, 7490 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7491 VR128:$src2, sub_xmm), 0xf)>; 7492def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 7493 (VBLENDPSYrri VR256:$src1, 7494 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7495 VR128:$src2, sub_xmm), 0xf)>; 7496 7497def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 7498 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7499 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7500def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 7501 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7502 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7503def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 7504 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7505 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7506def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 7507 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7508 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7509} 7510 7511//===----------------------------------------------------------------------===// 7512// VPBROADCAST - Load from memory and broadcast to all elements of the 7513// destination operand 7514// 7515multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 7516 X86MemOperand x86memop, PatFrag bcast_frag, 7517 ValueType OpVT128, ValueType OpVT256, Predicate prd> { 7518 let Predicates = [HasAVX2, prd] in { 7519 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 7520 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7521 [(set VR128:$dst, 7522 (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7523 Sched<[SchedWriteShuffle.XMM]>, VEX; 7524 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 7525 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7526 [(set VR128:$dst, 7527 (OpVT128 (bcast_frag addr:$src)))]>, 7528 Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 7529 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 7530 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7531 [(set VR256:$dst, 7532 (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7533 Sched<[WriteShuffle256]>, VEX, VEX_L; 7534 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 7535 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7536 [(set VR256:$dst, 7537 (OpVT256 (bcast_frag addr:$src)))]>, 7538 Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 7539 7540 // Provide aliases for broadcast from the same register class that 7541 // automatically does the extract. 7542 def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 7543 (!cast<Instruction>(NAME#"Yrr") 7544 (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 7545 } 7546} 7547 7548defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, 7549 v16i8, v32i8, NoVLX_Or_NoBWI>; 7550defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, 7551 v8i16, v16i16, NoVLX_Or_NoBWI>; 7552defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, 7553 v4i32, v8i32, NoVLX>; 7554defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, 7555 v2i64, v4i64, NoVLX>; 7556 7557let Predicates = [HasAVX2, NoVLX] in { 7558 // Provide fallback in case the load node that is used in the patterns above 7559 // is used by additional users, which prevents the pattern selection. 7560 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7561 (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7562 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7563 (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7564 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7565 (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7566} 7567 7568let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 7569 def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 7570 (VPBROADCASTBrr (VMOVDI2PDIrr 7571 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7572 GR8:$src, sub_8bit))))>; 7573 def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 7574 (VPBROADCASTBYrr (VMOVDI2PDIrr 7575 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7576 GR8:$src, sub_8bit))))>; 7577 7578 def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 7579 (VPBROADCASTWrr (VMOVDI2PDIrr 7580 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7581 GR16:$src, sub_16bit))))>; 7582 def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 7583 (VPBROADCASTWYrr (VMOVDI2PDIrr 7584 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7585 GR16:$src, sub_16bit))))>; 7586} 7587let Predicates = [HasAVX2, NoVLX] in { 7588 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7589 (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; 7590 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7591 (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; 7592 def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 7593 (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; 7594 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7595 (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; 7596} 7597 7598// AVX1 broadcast patterns 7599let Predicates = [HasAVX1Only] in { 7600def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), 7601 (VBROADCASTSSYrm addr:$src)>; 7602def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), 7603 (VBROADCASTSDYrm addr:$src)>; 7604def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), 7605 (VBROADCASTSSrm addr:$src)>; 7606} 7607 7608 // Provide fallback in case the load node that is used in the patterns above 7609 // is used by additional users, which prevents the pattern selection. 7610let Predicates = [HasAVX, NoVLX] in { 7611 // 128bit broadcasts: 7612 def : Pat<(v2f64 (X86VBroadcast f64:$src)), 7613 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7614 def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), 7615 (VMOVDDUPrm addr:$src)>; 7616 7617 def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 7618 (VMOVDDUPrr VR128:$src)>; 7619} 7620 7621let Predicates = [HasAVX1Only] in { 7622 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7623 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 7624 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7625 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7626 (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 7627 (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 7628 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7629 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7630 (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 7631 (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 7632 7633 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7634 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; 7635 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7636 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7637 (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), 7638 (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; 7639 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7640 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 7641 (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), 7642 (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; 7643 7644 def : Pat<(v2i64 (X86VBroadcast i64:$src)), 7645 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; 7646 def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), 7647 (VMOVDDUPrm addr:$src)>; 7648} 7649 7650//===----------------------------------------------------------------------===// 7651// VPERM - Permute instructions 7652// 7653 7654multiclass avx2_perm<bits<8> opc, string OpcodeStr, 7655 ValueType OpVT, X86FoldableSchedWrite Sched, 7656 X86MemOperand memOp> { 7657 let Predicates = [HasAVX2, NoVLX] in { 7658 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7659 (ins VR256:$src1, VR256:$src2), 7660 !strconcat(OpcodeStr, 7661 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7662 [(set VR256:$dst, 7663 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 7664 Sched<[Sched]>, VEX_4V, VEX_L; 7665 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7666 (ins VR256:$src1, memOp:$src2), 7667 !strconcat(OpcodeStr, 7668 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7669 [(set VR256:$dst, 7670 (OpVT (X86VPermv VR256:$src1, 7671 (load addr:$src2))))]>, 7672 Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; 7673 } 7674} 7675 7676defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 7677let ExeDomain = SSEPackedSingle in 7678defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 7679 7680multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7681 ValueType OpVT, X86FoldableSchedWrite Sched, 7682 X86MemOperand memOp> { 7683 let Predicates = [HasAVX2, NoVLX] in { 7684 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 7685 (ins VR256:$src1, u8imm:$src2), 7686 !strconcat(OpcodeStr, 7687 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7688 [(set VR256:$dst, 7689 (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, 7690 Sched<[Sched]>, VEX, VEX_L; 7691 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 7692 (ins memOp:$src1, u8imm:$src2), 7693 !strconcat(OpcodeStr, 7694 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7695 [(set VR256:$dst, 7696 (OpVT (X86VPermi (mem_frag addr:$src1), 7697 (i8 timm:$src2))))]>, 7698 Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 7699 } 7700} 7701 7702defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 7703 WriteShuffle256, i256mem>, VEX_W; 7704let ExeDomain = SSEPackedDouble in 7705defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 7706 WriteFShuffle256, f256mem>, VEX_W; 7707 7708//===----------------------------------------------------------------------===// 7709// VPERM2I128 - Permute Integer vector Values in 128-bit chunks 7710// 7711let isCommutable = 1 in 7712def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 7713 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7714 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7715 Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 7716def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 7717 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7718 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7719 Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7720 7721let Predicates = [HasAVX2] in { 7722 defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; 7723 defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; 7724 defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; 7725 defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7726} 7727 7728//===----------------------------------------------------------------------===// 7729// VINSERTI128 - Insert packed integer values 7730// 7731let hasSideEffects = 0 in { 7732def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 7733 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7734 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7735 []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 7736let mayLoad = 1 in 7737def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 7738 (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 7739 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7740 []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7741} 7742 7743let Predicates = [HasAVX2, NoVLX] in { 7744 defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>; 7745 defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>; 7746 defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>; 7747 defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>; 7748} 7749 7750//===----------------------------------------------------------------------===// 7751// VEXTRACTI128 - Extract packed integer values 7752// 7753def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 7754 (ins VR256:$src1, u8imm:$src2), 7755 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7756 Sched<[WriteShuffle256]>, VEX, VEX_L; 7757let hasSideEffects = 0, mayStore = 1 in 7758def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 7759 (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 7760 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7761 Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 7762 7763let Predicates = [HasAVX2, NoVLX] in { 7764 defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 7765 defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 7766 defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 7767 defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 7768} 7769 7770//===----------------------------------------------------------------------===// 7771// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 7772// 7773multiclass avx2_pmovmask<string OpcodeStr, 7774 Intrinsic IntLd128, Intrinsic IntLd256, 7775 Intrinsic IntSt128, Intrinsic IntSt256, 7776 X86SchedWriteMaskMove schedX, 7777 X86SchedWriteMaskMove schedY> { 7778 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 7779 (ins VR128:$src1, i128mem:$src2), 7780 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7781 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 7782 VEX_4V, Sched<[schedX.RM]>; 7783 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 7784 (ins VR256:$src1, i256mem:$src2), 7785 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7786 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7787 VEX_4V, VEX_L, Sched<[schedY.RM]>; 7788 def mr : AVX28I<0x8e, MRMDestMem, (outs), 7789 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 7790 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7791 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 7792 VEX_4V, Sched<[schedX.MR]>; 7793 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 7794 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 7795 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7796 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7797 VEX_4V, VEX_L, Sched<[schedY.MR]>; 7798} 7799 7800defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 7801 int_x86_avx2_maskload_d, 7802 int_x86_avx2_maskload_d_256, 7803 int_x86_avx2_maskstore_d, 7804 int_x86_avx2_maskstore_d_256, 7805 WriteVecMaskMove32, WriteVecMaskMove32Y>; 7806defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 7807 int_x86_avx2_maskload_q, 7808 int_x86_avx2_maskload_q_256, 7809 int_x86_avx2_maskstore_q, 7810 int_x86_avx2_maskstore_q_256, 7811 WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W; 7812 7813multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 7814 ValueType MaskVT> { 7815 // masked store 7816 def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 7817 (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 7818 // masked load 7819 def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 7820 (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7821 def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 7822 (VT immAllZerosV))), 7823 (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7824} 7825let Predicates = [HasAVX] in { 7826 defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; 7827 defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; 7828 defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; 7829 defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; 7830} 7831let Predicates = [HasAVX1Only] in { 7832 // load/store i32/i64 not supported use ps/pd version 7833 defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; 7834 defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; 7835 defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; 7836 defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; 7837} 7838let Predicates = [HasAVX2] in { 7839 defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; 7840 defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; 7841 defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; 7842 defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; 7843} 7844 7845//===----------------------------------------------------------------------===// 7846// Variable Bit Shifts 7847// 7848multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 7849 ValueType vt128, ValueType vt256> { 7850 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 7851 (ins VR128:$src1, VR128:$src2), 7852 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7853 [(set VR128:$dst, 7854 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 7855 VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; 7856 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 7857 (ins VR128:$src1, i128mem:$src2), 7858 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7859 [(set VR128:$dst, 7860 (vt128 (OpNode VR128:$src1, 7861 (vt128 (load addr:$src2)))))]>, 7862 VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, 7863 SchedWriteVarVecShift.XMM.ReadAfterFold]>; 7864 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7865 (ins VR256:$src1, VR256:$src2), 7866 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7867 [(set VR256:$dst, 7868 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 7869 VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 7870 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7871 (ins VR256:$src1, i256mem:$src2), 7872 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7873 [(set VR256:$dst, 7874 (vt256 (OpNode VR256:$src1, 7875 (vt256 (load addr:$src2)))))]>, 7876 VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 7877 SchedWriteVarVecShift.YMM.ReadAfterFold]>; 7878} 7879 7880let Predicates = [HasAVX2, NoVLX] in { 7881 defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 7882 defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; 7883 defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 7884 defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; 7885 defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 7886} 7887 7888//===----------------------------------------------------------------------===// 7889// VGATHER - GATHER Operations 7890 7891// FIXME: Improve scheduling of gather instructions. 7892multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx, 7893 ValueType VTy, RegisterClass RC256, 7894 X86MemOperand memop128, X86MemOperand memop256, 7895 ValueType MTx = VTx, ValueType MTy = VTy> { 7896let mayLoad = 1, hasSideEffects = 0 in { 7897 def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 7898 (ins VR128:$src1, memop128:$src2, VR128:$mask), 7899 !strconcat(OpcodeStr, 7900 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7901 []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 7902 def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 7903 (ins RC256:$src1, memop256:$src2, RC256:$mask), 7904 !strconcat(OpcodeStr, 7905 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7906 []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 7907} 7908} 7909 7910let Predicates = [HasAVX2] in { 7911 let mayLoad = 1, hasSideEffects = 0, Constraints 7912 = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 7913 in { 7914 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, 7915 VR256, vx128mem, vx256mem>, VEX_W; 7916 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, 7917 VR256, vx128mem, vy256mem>, VEX_W; 7918 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, 7919 VR256, vx128mem, vy256mem>; 7920 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, 7921 VR128, vx64mem, vy128mem>; 7922 7923 let ExeDomain = SSEPackedDouble in { 7924 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, 7925 VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W; 7926 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, 7927 VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W; 7928 } 7929 7930 let ExeDomain = SSEPackedSingle in { 7931 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, 7932 VR256, vx128mem, vy256mem, v4i32, v8i32>; 7933 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, 7934 VR128, vx64mem, vy128mem, v4i32, v4i32>; 7935 } 7936 } 7937} 7938 7939//===----------------------------------------------------------------------===// 7940// GFNI instructions 7941//===----------------------------------------------------------------------===// 7942 7943multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 7944 RegisterClass RC, PatFrag MemOpFrag, 7945 X86MemOperand X86MemOp, bit Is2Addr = 0> { 7946 let ExeDomain = SSEPackedInt, 7947 AsmString = !if(Is2Addr, 7948 OpcodeStr#"\t{$src2, $dst|$dst, $src2}", 7949 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 7950 let isCommutable = 1 in 7951 def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 7952 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 7953 Sched<[SchedWriteVecALU.XMM]>, T8PD; 7954 7955 def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 7956 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 7957 (MemOpFrag addr:$src2))))]>, 7958 Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD; 7959 } 7960} 7961 7962multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 7963 SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 7964 X86MemOperand X86MemOp, bit Is2Addr = 0> { 7965 let AsmString = !if(Is2Addr, 7966 OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7967 OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 7968 def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 7969 (ins RC:$src1, RC:$src2, u8imm:$src3), "", 7970 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], 7971 SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; 7972 def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 7973 (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 7974 [(set RC:$dst, (OpVT (OpNode RC:$src1, 7975 (MemOpFrag addr:$src2), 7976 timm:$src3)))], SSEPackedInt>, 7977 Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; 7978 } 7979} 7980 7981multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 7982 let Constraints = "$src1 = $dst", 7983 Predicates = [HasGFNI, UseSSE2] in 7984 defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 7985 VR128, load, i128mem, 1>; 7986 let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 7987 defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128, 7988 load, i128mem>, VEX_4V, VEX_W; 7989 defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256, 7990 load, i256mem>, VEX_4V, VEX_L, VEX_W; 7991 } 7992} 7993 7994// GF2P8MULB 7995let Constraints = "$src1 = $dst", 7996 Predicates = [HasGFNI, UseSSE2] in 7997defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 7998 i128mem, 1>; 7999let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 8000 defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 8001 i128mem>, VEX_4V; 8002 defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 8003 i256mem>, VEX_4V, VEX_L; 8004} 8005// GF2P8AFFINEINVQB, GF2P8AFFINEQB 8006let isCommutable = 0 in { 8007 defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 8008 X86GF2P8affineinvqb>, TAPD; 8009 defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 8010 X86GF2P8affineqb>, TAPD; 8011} 8012 8013