1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 SSE instruction set, defining the instructions, 10// and properties of the instructions which are needed for code generation, 11// machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15//===----------------------------------------------------------------------===// 16// SSE 1 & 2 Instructions Classes 17//===----------------------------------------------------------------------===// 18 19/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 20multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 21 RegisterClass RC, X86MemOperand x86memop, 22 Domain d, X86FoldableSchedWrite sched, 23 bit Is2Addr = 1> { 24let isCodeGenOnly = 1 in { 25 let isCommutable = 1 in { 26 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 27 !if(Is2Addr, 28 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 29 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 30 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], d>, 31 Sched<[sched]>; 32 } 33 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 34 !if(Is2Addr, 35 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 36 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 37 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, 38 Sched<[sched.Folded, sched.ReadAfterFold]>; 39} 40} 41 42/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 43multiclass sse12_fp_scalar_int<bits<8> opc, 44 SDPatternOperator OpNode, RegisterClass RC, 45 ValueType VT, string asm, Operand memopr, 46 PatFrags mem_frags, Domain d, 47 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 48let hasSideEffects = 0 in { 49 def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 50 !if(Is2Addr, 51 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 52 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 53 [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], d>, 54 Sched<[sched]>; 55 let mayLoad = 1 in 56 def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 57 !if(Is2Addr, 58 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 59 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 60 [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>, 61 Sched<[sched.Folded, sched.ReadAfterFold]>; 62} 63} 64 65/// sse12_fp_packed - SSE 1 & 2 packed instructions class 66multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 67 RegisterClass RC, ValueType vt, 68 X86MemOperand x86memop, PatFrag mem_frag, 69 Domain d, X86FoldableSchedWrite sched, 70 bit Is2Addr = 1> { 71 let isCommutable = 1 in 72 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 73 !if(Is2Addr, 74 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 75 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 76 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>, 77 Sched<[sched]>; 78 let mayLoad = 1 in 79 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 80 !if(Is2Addr, 81 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 82 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 83 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 84 d>, 85 Sched<[sched.Folded, sched.ReadAfterFold]>; 86} 87 88/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 89multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 90 string OpcodeStr, X86MemOperand x86memop, 91 X86FoldableSchedWrite sched, 92 list<dag> pat_rr, list<dag> pat_rm, 93 bit Is2Addr = 1> { 94 let isCommutable = 1, hasSideEffects = 0 in 95 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 96 !if(Is2Addr, 97 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 98 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 99 pat_rr, d>, 100 Sched<[sched]>; 101 let hasSideEffects = 0, mayLoad = 1 in 102 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 103 !if(Is2Addr, 104 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 105 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 106 pat_rm, d>, 107 Sched<[sched.Folded, sched.ReadAfterFold]>; 108} 109 110 111// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 112// This is expanded by ExpandPostRAPseudos. 113let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 114 isPseudo = 1, SchedRW = [WriteZero] in { 115 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 116 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; 117 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 118 [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; 119 def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 120 [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; 121} 122 123//===----------------------------------------------------------------------===// 124// AVX & SSE - Zero/One Vectors 125//===----------------------------------------------------------------------===// 126 127// Alias instruction that maps zero vector to pxor / xorp* for sse. 128// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 129// swizzled by ExecutionDomainFix to pxor. 130// We set canFoldAsLoad because this can be converted to a constant-pool 131// load of an all-zeros value if folding it would be beneficial. 132let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 133 isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 134def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 135 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 136} 137 138let Predicates = [NoAVX512] in { 139def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 140def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 141def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 142def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 143def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 144} 145 146 147// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 148// and doesn't need it because on sandy bridge the register is set to zero 149// at the rename stage without using any execution unit, so SET0PSY 150// and SET0PDY can be used for vector int instructions without penalty 151let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 152 isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { 153def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 154 [(set VR256:$dst, (v8i32 immAllZerosV))]>; 155} 156 157let Predicates = [NoAVX512] in { 158def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 159def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 160def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 161def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; 162def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 163} 164 165// We set canFoldAsLoad because this can be converted to a constant-pool 166// load of an all-ones value if folding it would be beneficial. 167let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 168 isPseudo = 1, SchedRW = [WriteZero] in { 169 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 170 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 171 let Predicates = [HasAVX1Only, OptForMinSize] in { 172 def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", 173 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 174 } 175 let Predicates = [HasAVX2] in 176 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 177 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 178} 179 180//===----------------------------------------------------------------------===// 181// SSE 1 & 2 - Move FP Scalar Instructions 182// 183// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 184// register copies because it's a partial register update; Register-to-register 185// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 186// that the insert be implementable in terms of a copy, and just mentioned, we 187// don't use movss/movsd for copies. 188//===----------------------------------------------------------------------===// 189 190multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc, 191 string asm_opr, Domain d, string Name> { 192 let isCommutable = 1 in 193 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 194 (ins VR128:$src1, VR128:$src2), 195 !strconcat(base_opc, asm_opr), 196 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], d>, 197 Sched<[SchedWriteFShuffle.XMM]>; 198 199 // For the disassembler 200 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 201 def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 202 (ins VR128:$src1, VR128:$src2), 203 !strconcat(base_opc, asm_opr), []>, 204 Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>; 205} 206 207multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 208 X86MemOperand x86memop, string OpcodeStr, 209 Domain d, string Name, Predicate pred> { 210 // AVX 211 let Predicates = [UseAVX, OptForSize] in 212 defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 213 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, 214 "V"#Name>, 215 VEX_4V, VEX_LIG, VEX_WIG; 216 217 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 218 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 219 [(store RC:$src, addr:$dst)], d>, 220 VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG; 221 // SSE1 & 2 222 let Constraints = "$src1 = $dst" in { 223 let Predicates = [pred, NoSSE41_Or_OptForSize] in 224 defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr, 225 "\t{$src2, $dst|$dst, $src2}", d, Name>; 226 } 227 228 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 229 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 230 [(store RC:$src, addr:$dst)], d>, 231 Sched<[WriteFStore]>; 232 233 def : InstAlias<"v"#OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 234 (!cast<Instruction>("V"#NAME#"rr_REV") 235 VR128:$dst, VR128:$src1, VR128:$src2), 0>; 236 def : InstAlias<OpcodeStr#".s\t{$src2, $dst|$dst, $src2}", 237 (!cast<Instruction>(NAME#"rr_REV") 238 VR128:$dst, VR128:$src2), 0>; 239} 240 241// Loading from memory automatically zeroing upper bits. 242multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop, 243 PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, 244 Domain d> { 245 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 246 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 247 [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 248 VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 249 def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 250 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 251 [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, 252 Sched<[WriteFLoad]>; 253 254 // _alt version uses FR32/FR64 register class. 255 let isCodeGenOnly = 1 in { 256 def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 257 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 258 [(set RC:$dst, (mem_pat addr:$src))], d>, 259 VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; 260 def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 261 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 262 [(set RC:$dst, (mem_pat addr:$src))], d>, 263 Sched<[WriteFLoad]>; 264 } 265} 266 267defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 268 SSEPackedSingle, "MOVSS", UseSSE1>, XS; 269defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 270 SSEPackedDouble, "MOVSD", UseSSE2>, XD; 271 272let canFoldAsLoad = 1, isReMaterializable = 1 in { 273 defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss", 274 SSEPackedSingle>, XS; 275 defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd", 276 SSEPackedDouble>, XD; 277} 278 279// Patterns 280let Predicates = [UseAVX] in { 281 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 282 (VMOVSSrm addr:$src)>; 283 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 284 (VMOVSDrm addr:$src)>; 285 286 // Represent the same patterns above but in the form they appear for 287 // 256-bit types 288 def : Pat<(v8f32 (X86vzload32 addr:$src)), 289 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 290 def : Pat<(v4f64 (X86vzload64 addr:$src)), 291 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 292} 293 294let Predicates = [UseAVX, OptForSize] in { 295 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 296 // MOVSS to the lower bits. 297 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 298 (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 299 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 300 (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 301 302 // Move low f32 and clear high bits. 303 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 304 (SUBREG_TO_REG (i32 0), 305 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), 306 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))), sub_xmm)>; 307 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 308 (SUBREG_TO_REG (i32 0), 309 (v4i32 (VMOVSSrr (v4i32 (V_SET0)), 310 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; 311} 312 313let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { 314// Move scalar to XMM zero-extended, zeroing a VR128 then do a 315// MOVSS to the lower bits. 316def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 317 (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; 318def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 319 (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; 320} 321 322let Predicates = [UseSSE2] in 323def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 324 (MOVSDrm addr:$src)>; 325 326let Predicates = [UseSSE1] in 327def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 328 (MOVSSrm addr:$src)>; 329 330//===----------------------------------------------------------------------===// 331// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 332//===----------------------------------------------------------------------===// 333 334multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 335 X86MemOperand x86memop, PatFrag ld_frag, 336 string asm, Domain d, 337 X86SchedWriteMoveLS sched> { 338let hasSideEffects = 0, isMoveReg = 1 in 339 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 340 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, 341 Sched<[sched.RR]>; 342let canFoldAsLoad = 1, isReMaterializable = 1 in 343 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 344 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 345 [(set RC:$dst, (ld_frag addr:$src))], d>, 346 Sched<[sched.RM]>; 347} 348 349let Predicates = [HasAVX, NoVLX] in { 350defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 351 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 352 PS, VEX, VEX_WIG; 353defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 354 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 355 PD, VEX, VEX_WIG; 356defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 357 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 358 PS, VEX, VEX_WIG; 359defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 360 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 361 PD, VEX, VEX_WIG; 362 363defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", 364 SSEPackedSingle, SchedWriteFMoveLS.YMM>, 365 PS, VEX, VEX_L, VEX_WIG; 366defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", 367 SSEPackedDouble, SchedWriteFMoveLS.YMM>, 368 PD, VEX, VEX_L, VEX_WIG; 369defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", 370 SSEPackedSingle, SchedWriteFMoveLS.YMM>, 371 PS, VEX, VEX_L, VEX_WIG; 372defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", 373 SSEPackedDouble, SchedWriteFMoveLS.YMM>, 374 PD, VEX, VEX_L, VEX_WIG; 375} 376 377let Predicates = [UseSSE1] in { 378defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", 379 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 380 PS; 381defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", 382 SSEPackedSingle, SchedWriteFMoveLS.XMM>, 383 PS; 384} 385let Predicates = [UseSSE2] in { 386defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", 387 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 388 PD; 389defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", 390 SSEPackedDouble, SchedWriteFMoveLS.XMM>, 391 PD; 392} 393 394let Predicates = [HasAVX, NoVLX] in { 395let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 396def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 397 "movaps\t{$src, $dst|$dst, $src}", 398 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, 399 VEX, VEX_WIG; 400def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 401 "movapd\t{$src, $dst|$dst, $src}", 402 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, 403 VEX, VEX_WIG; 404def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 405 "movups\t{$src, $dst|$dst, $src}", 406 [(store (v4f32 VR128:$src), addr:$dst)]>, 407 VEX, VEX_WIG; 408def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 409 "movupd\t{$src, $dst|$dst, $src}", 410 [(store (v2f64 VR128:$src), addr:$dst)]>, 411 VEX, VEX_WIG; 412} // SchedRW 413 414let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { 415def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 416 "movaps\t{$src, $dst|$dst, $src}", 417 [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, 418 VEX, VEX_L, VEX_WIG; 419def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 420 "movapd\t{$src, $dst|$dst, $src}", 421 [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, 422 VEX, VEX_L, VEX_WIG; 423def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 424 "movups\t{$src, $dst|$dst, $src}", 425 [(store (v8f32 VR256:$src), addr:$dst)]>, 426 VEX, VEX_L, VEX_WIG; 427def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 428 "movupd\t{$src, $dst|$dst, $src}", 429 [(store (v4f64 VR256:$src), addr:$dst)]>, 430 VEX, VEX_L, VEX_WIG; 431} // SchedRW 432} // Predicate 433 434// For disassembler 435let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 436 isMoveReg = 1 in { 437let SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 438 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 439 (ins VR128:$src), 440 "movaps\t{$src, $dst|$dst, $src}", []>, 441 VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">; 442 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 443 (ins VR128:$src), 444 "movapd\t{$src, $dst|$dst, $src}", []>, 445 VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">; 446 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 447 (ins VR128:$src), 448 "movups\t{$src, $dst|$dst, $src}", []>, 449 VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">; 450 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 451 (ins VR128:$src), 452 "movupd\t{$src, $dst|$dst, $src}", []>, 453 VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">; 454} // SchedRW 455 456let SchedRW = [SchedWriteFMoveLS.YMM.RR] in { 457 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 458 (ins VR256:$src), 459 "movaps\t{$src, $dst|$dst, $src}", []>, 460 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">; 461 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 462 (ins VR256:$src), 463 "movapd\t{$src, $dst|$dst, $src}", []>, 464 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">; 465 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 466 (ins VR256:$src), 467 "movups\t{$src, $dst|$dst, $src}", []>, 468 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">; 469 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 470 (ins VR256:$src), 471 "movupd\t{$src, $dst|$dst, $src}", []>, 472 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">; 473} // SchedRW 474} // Predicate 475 476// Reversed version with ".s" suffix for GAS compatibility. 477def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 478 (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 479def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 480 (VMOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 481def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 482 (VMOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 483def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 484 (VMOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 485def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}", 486 (VMOVAPSYrr_REV VR256:$dst, VR256:$src), 0>; 487def : InstAlias<"vmovapd.s\t{$src, $dst|$dst, $src}", 488 (VMOVAPDYrr_REV VR256:$dst, VR256:$src), 0>; 489def : InstAlias<"vmovups.s\t{$src, $dst|$dst, $src}", 490 (VMOVUPSYrr_REV VR256:$dst, VR256:$src), 0>; 491def : InstAlias<"vmovupd.s\t{$src, $dst|$dst, $src}", 492 (VMOVUPDYrr_REV VR256:$dst, VR256:$src), 0>; 493 494let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { 495def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 496 "movaps\t{$src, $dst|$dst, $src}", 497 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 498def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 499 "movapd\t{$src, $dst|$dst, $src}", 500 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 501def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 502 "movups\t{$src, $dst|$dst, $src}", 503 [(store (v4f32 VR128:$src), addr:$dst)]>; 504def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 505 "movupd\t{$src, $dst|$dst, $src}", 506 [(store (v2f64 VR128:$src), addr:$dst)]>; 507} // SchedRW 508 509// For disassembler 510let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 511 isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in { 512 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 513 "movaps\t{$src, $dst|$dst, $src}", []>, 514 FoldGenData<"MOVAPSrr">; 515 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 516 "movapd\t{$src, $dst|$dst, $src}", []>, 517 FoldGenData<"MOVAPDrr">; 518 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 519 "movups\t{$src, $dst|$dst, $src}", []>, 520 FoldGenData<"MOVUPSrr">; 521 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 522 "movupd\t{$src, $dst|$dst, $src}", []>, 523 FoldGenData<"MOVUPDrr">; 524} 525 526// Reversed version with ".s" suffix for GAS compatibility. 527def : InstAlias<"movaps.s\t{$src, $dst|$dst, $src}", 528 (MOVAPSrr_REV VR128:$dst, VR128:$src), 0>; 529def : InstAlias<"movapd.s\t{$src, $dst|$dst, $src}", 530 (MOVAPDrr_REV VR128:$dst, VR128:$src), 0>; 531def : InstAlias<"movups.s\t{$src, $dst|$dst, $src}", 532 (MOVUPSrr_REV VR128:$dst, VR128:$src), 0>; 533def : InstAlias<"movupd.s\t{$src, $dst|$dst, $src}", 534 (MOVUPDrr_REV VR128:$dst, VR128:$src), 0>; 535 536let Predicates = [HasAVX, NoVLX] in { 537 // 256-bit load/store need to use floating point load/store in case we don't 538 // have AVX2. Execution domain fixing will convert to integer if AVX2 is 539 // available and changing the domain is beneficial. 540 def : Pat<(alignedloadv4i64 addr:$src), 541 (VMOVAPSYrm addr:$src)>; 542 def : Pat<(alignedloadv8i32 addr:$src), 543 (VMOVAPSYrm addr:$src)>; 544 def : Pat<(alignedloadv16i16 addr:$src), 545 (VMOVAPSYrm addr:$src)>; 546 def : Pat<(alignedloadv32i8 addr:$src), 547 (VMOVAPSYrm addr:$src)>; 548 def : Pat<(loadv4i64 addr:$src), 549 (VMOVUPSYrm addr:$src)>; 550 def : Pat<(loadv8i32 addr:$src), 551 (VMOVUPSYrm addr:$src)>; 552 def : Pat<(loadv16i16 addr:$src), 553 (VMOVUPSYrm addr:$src)>; 554 def : Pat<(loadv32i8 addr:$src), 555 (VMOVUPSYrm addr:$src)>; 556 557 def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), 558 (VMOVAPSYmr addr:$dst, VR256:$src)>; 559 def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), 560 (VMOVAPSYmr addr:$dst, VR256:$src)>; 561 def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), 562 (VMOVAPSYmr addr:$dst, VR256:$src)>; 563 def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), 564 (VMOVAPSYmr addr:$dst, VR256:$src)>; 565 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 566 (VMOVUPSYmr addr:$dst, VR256:$src)>; 567 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 568 (VMOVUPSYmr addr:$dst, VR256:$src)>; 569 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 570 (VMOVUPSYmr addr:$dst, VR256:$src)>; 571 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 572 (VMOVUPSYmr addr:$dst, VR256:$src)>; 573} 574 575// Use movaps / movups for SSE integer load / store (one byte shorter). 576// The instructions selected below are then converted to MOVDQA/MOVDQU 577// during the SSE domain pass. 578let Predicates = [UseSSE1] in { 579 def : Pat<(alignedloadv2i64 addr:$src), 580 (MOVAPSrm addr:$src)>; 581 def : Pat<(alignedloadv4i32 addr:$src), 582 (MOVAPSrm addr:$src)>; 583 def : Pat<(alignedloadv8i16 addr:$src), 584 (MOVAPSrm addr:$src)>; 585 def : Pat<(alignedloadv16i8 addr:$src), 586 (MOVAPSrm addr:$src)>; 587 def : Pat<(loadv2i64 addr:$src), 588 (MOVUPSrm addr:$src)>; 589 def : Pat<(loadv4i32 addr:$src), 590 (MOVUPSrm addr:$src)>; 591 def : Pat<(loadv8i16 addr:$src), 592 (MOVUPSrm addr:$src)>; 593 def : Pat<(loadv16i8 addr:$src), 594 (MOVUPSrm addr:$src)>; 595 596 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 597 (MOVAPSmr addr:$dst, VR128:$src)>; 598 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 599 (MOVAPSmr addr:$dst, VR128:$src)>; 600 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 601 (MOVAPSmr addr:$dst, VR128:$src)>; 602 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 603 (MOVAPSmr addr:$dst, VR128:$src)>; 604 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 605 (MOVUPSmr addr:$dst, VR128:$src)>; 606 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 607 (MOVUPSmr addr:$dst, VR128:$src)>; 608 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 609 (MOVUPSmr addr:$dst, VR128:$src)>; 610 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 611 (MOVUPSmr addr:$dst, VR128:$src)>; 612} 613 614//===----------------------------------------------------------------------===// 615// SSE 1 & 2 - Move Low packed FP Instructions 616//===----------------------------------------------------------------------===// 617 618multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDPatternOperator pdnode, 619 string base_opc, string asm_opr> { 620 // No pattern as they need be special cased between high and low. 621 let hasSideEffects = 0, mayLoad = 1 in 622 def PSrm : PI<opc, MRMSrcMem, 623 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 624 !strconcat(base_opc, "s", asm_opr), 625 [], SSEPackedSingle>, PS, 626 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 627 628 def PDrm : PI<opc, MRMSrcMem, 629 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 630 !strconcat(base_opc, "d", asm_opr), 631 [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 632 (scalar_to_vector (loadf64 addr:$src2)))))], 633 SSEPackedDouble>, PD, 634 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 635} 636 637multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode, 638 string base_opc> { 639 let Predicates = [UseAVX] in 640 defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 641 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 642 VEX_4V, VEX_WIG; 643 644 let Constraints = "$src1 = $dst" in 645 defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc, 646 "\t{$src2, $dst|$dst, $src2}">; 647} 648 649defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">; 650 651let SchedRW = [WriteFStore] in { 652let Predicates = [UseAVX] in { 653let mayStore = 1, hasSideEffects = 0 in 654def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 655 "movlps\t{$src, $dst|$dst, $src}", 656 []>, 657 VEX, VEX_WIG; 658def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 659 "movlpd\t{$src, $dst|$dst, $src}", 660 [(store (f64 (extractelt (v2f64 VR128:$src), 661 (iPTR 0))), addr:$dst)]>, 662 VEX, VEX_WIG; 663}// UseAVX 664let mayStore = 1, hasSideEffects = 0 in 665def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 666 "movlps\t{$src, $dst|$dst, $src}", 667 []>; 668def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 669 "movlpd\t{$src, $dst|$dst, $src}", 670 [(store (f64 (extractelt (v2f64 VR128:$src), 671 (iPTR 0))), addr:$dst)]>; 672} // SchedRW 673 674let Predicates = [UseSSE1] in { 675 // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll 676 // end up with a movsd or blend instead of shufp. 677 // No need for aligned load, we're only loading 64-bits. 678 def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, 679 (i8 -28)), 680 (MOVLPSrm VR128:$src1, addr:$src2)>; 681 def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), 682 (MOVLPSrm VR128:$src1, addr:$src2)>; 683 684 def : Pat<(v4f32 (X86vzload64 addr:$src)), 685 (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; 686 def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), 687 (MOVLPSmr addr:$dst, VR128:$src)>; 688} 689 690//===----------------------------------------------------------------------===// 691// SSE 1 & 2 - Move Hi packed FP Instructions 692//===----------------------------------------------------------------------===// 693 694defm MOVH : sse12_mov_hilo_packed<0x16, X86Unpckl, "movhp">; 695 696let SchedRW = [WriteFStore] in { 697// v2f64 extract element 1 is always custom lowered to unpack high to low 698// and extract element 0 so the non-store version isn't too horrible. 699let Predicates = [UseAVX] in { 700let mayStore = 1, hasSideEffects = 0 in 701def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 702 "movhps\t{$src, $dst|$dst, $src}", 703 []>, VEX, VEX_WIG; 704def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 705 "movhpd\t{$src, $dst|$dst, $src}", 706 [(store (f64 (extractelt 707 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 708 (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG; 709} // UseAVX 710let mayStore = 1, hasSideEffects = 0 in 711def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 712 "movhps\t{$src, $dst|$dst, $src}", 713 []>; 714def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 715 "movhpd\t{$src, $dst|$dst, $src}", 716 [(store (f64 (extractelt 717 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 718 (iPTR 0))), addr:$dst)]>; 719} // SchedRW 720 721let Predicates = [UseAVX] in { 722 // MOVHPD patterns 723 def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 724 (VMOVHPDrm VR128:$src1, addr:$src2)>; 725 726 def : Pat<(store (f64 (extractelt 727 (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 728 (iPTR 0))), addr:$dst), 729 (VMOVHPDmr addr:$dst, VR128:$src)>; 730 731 // MOVLPD patterns 732 def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 733 (VMOVLPDrm VR128:$src1, addr:$src2)>; 734} 735 736let Predicates = [UseSSE1] in { 737 // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll 738 // end up with a movsd or blend instead of shufp. 739 // No need for aligned load, we're only loading 64-bits. 740 def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), 741 (MOVHPSrm VR128:$src1, addr:$src2)>; 742 def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), 743 (MOVHPSrm VR128:$src1, addr:$src2)>; 744 745 def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), 746 addr:$dst), 747 (MOVHPSmr addr:$dst, VR128:$src)>; 748} 749 750let Predicates = [UseSSE2] in { 751 // MOVHPD patterns 752 def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), 753 (MOVHPDrm VR128:$src1, addr:$src2)>; 754 755 def : Pat<(store (f64 (extractelt 756 (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 757 (iPTR 0))), addr:$dst), 758 (MOVHPDmr addr:$dst, VR128:$src)>; 759 760 // MOVLPD patterns 761 def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), 762 (MOVLPDrm VR128:$src1, addr:$src2)>; 763} 764 765let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { 766 // Use MOVLPD to load into the low bits from a full vector unless we can use 767 // BLENDPD. 768 def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), 769 (MOVLPDrm VR128:$src1, addr:$src2)>; 770} 771 772//===----------------------------------------------------------------------===// 773// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 774//===----------------------------------------------------------------------===// 775 776let Predicates = [UseAVX] in { 777 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 778 (ins VR128:$src1, VR128:$src2), 779 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 780 [(set VR128:$dst, 781 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 782 VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; 783 let isCommutable = 1 in 784 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 785 (ins VR128:$src1, VR128:$src2), 786 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 787 [(set VR128:$dst, 788 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 789 VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG, 790 NotMemoryFoldable; 791} 792let Constraints = "$src1 = $dst" in { 793 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 794 (ins VR128:$src1, VR128:$src2), 795 "movlhps\t{$src2, $dst|$dst, $src2}", 796 [(set VR128:$dst, 797 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, 798 Sched<[SchedWriteFShuffle.XMM]>; 799 let isCommutable = 1 in 800 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 801 (ins VR128:$src1, VR128:$src2), 802 "movhlps\t{$src2, $dst|$dst, $src2}", 803 [(set VR128:$dst, 804 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, 805 Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; 806} 807 808//===----------------------------------------------------------------------===// 809// SSE 1 & 2 - Conversion Instructions 810//===----------------------------------------------------------------------===// 811 812multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 813 SDPatternOperator OpNode, X86MemOperand x86memop, PatFrag ld_frag, 814 string asm, string mem, X86FoldableSchedWrite sched, 815 Domain d, 816 SchedRead Int2Fpu = ReadDefault> { 817 let ExeDomain = d in { 818 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 819 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 820 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, 821 Sched<[sched, Int2Fpu]>; 822 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 823 mem#"\t{$src, $dst|$dst, $src}", 824 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, 825 Sched<[sched.Folded]>; 826 } 827} 828 829multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, 830 ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, 831 string asm, Domain d, X86FoldableSchedWrite sched> { 832let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { 833 def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, 834 [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>, 835 Sched<[sched]>; 836 let mayLoad = 1 in 837 def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm, 838 [(set RC:$dst, (DstTy (any_sint_to_fp 839 (SrcTy (ld_frag addr:$src)))))], d>, 840 Sched<[sched.Folded]>; 841} 842} 843 844multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 845 X86MemOperand x86memop, string asm, string mem, 846 X86FoldableSchedWrite sched, Domain d> { 847let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { 848 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 849 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 850 Sched<[sched, ReadDefault, ReadInt2Fpu]>; 851 let mayLoad = 1 in 852 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 853 (ins DstRC:$src1, x86memop:$src), 854 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 855 Sched<[sched.Folded, sched.ReadAfterFold]>; 856} // hasSideEffects = 0 857} 858 859let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 860defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 861 "cvttss2si", "cvttss2si", 862 WriteCvtSS2I, SSEPackedSingle>, 863 XS, VEX, VEX_LIG; 864defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 865 "cvttss2si", "cvttss2si", 866 WriteCvtSS2I, SSEPackedSingle>, 867 XS, VEX, VEX_W, VEX_LIG; 868defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 869 "cvttsd2si", "cvttsd2si", 870 WriteCvtSD2I, SSEPackedDouble>, 871 XD, VEX, VEX_LIG; 872defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 873 "cvttsd2si", "cvttsd2si", 874 WriteCvtSD2I, SSEPackedDouble>, 875 XD, VEX, VEX_W, VEX_LIG; 876 877defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 878 "cvtss2si", "cvtss2si", 879 WriteCvtSS2I, SSEPackedSingle>, 880 XS, VEX, VEX_LIG; 881defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 882 "cvtss2si", "cvtss2si", 883 WriteCvtSS2I, SSEPackedSingle>, 884 XS, VEX, VEX_W, VEX_LIG; 885defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 886 "cvtsd2si", "cvtsd2si", 887 WriteCvtSD2I, SSEPackedDouble>, 888 XD, VEX, VEX_LIG; 889defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 890 "cvtsd2si", "cvtsd2si", 891 WriteCvtSD2I, SSEPackedDouble>, 892 XD, VEX, VEX_W, VEX_LIG; 893} 894 895// The assembler can recognize rr 64-bit instructions by seeing a rxx 896// register, but the same isn't true when only using memory operands, 897// provide other assembly "l" and "q" forms to address this explicitly 898// where appropriate to do so. 899let isCodeGenOnly = 1 in { 900defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", 901 WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 902 VEX_LIG, SIMD_EXC; 903defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", 904 WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, 905 VEX_W, VEX_LIG, SIMD_EXC; 906defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", 907 WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 908 VEX_LIG; 909defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", 910 WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, 911 VEX_W, VEX_LIG, SIMD_EXC; 912} // isCodeGenOnly = 1 913 914let Predicates = [UseAVX] in { 915 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 916 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 917 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 918 (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 919 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 920 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 921 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 922 (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 923 924 def : Pat<(f32 (any_sint_to_fp GR32:$src)), 925 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 926 def : Pat<(f32 (any_sint_to_fp GR64:$src)), 927 (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 928 def : Pat<(f64 (any_sint_to_fp GR32:$src)), 929 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 930 def : Pat<(f64 (any_sint_to_fp GR64:$src)), 931 (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 932 933 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; 934 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; 935 936 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; 937 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; 938} 939 940let isCodeGenOnly = 1 in { 941defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32, 942 "cvttss2si", "cvttss2si", 943 WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 944defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32, 945 "cvttss2si", "cvttss2si", 946 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 947defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64, 948 "cvttsd2si", "cvttsd2si", 949 WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 950defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, 951 "cvttsd2si", "cvttsd2si", 952 WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 953 954defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, 955 "cvtss2si", "cvtss2si", 956 WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; 957defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, 958 "cvtss2si", "cvtss2si", 959 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; 960defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, 961 "cvtsd2si", "cvtsd2si", 962 WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; 963defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, 964 "cvtsd2si", "cvtsd2si", 965 WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; 966 967defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, 968 "cvtsi2ss", "cvtsi2ss{l}", 969 WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; 970defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64, 971 "cvtsi2ss", "cvtsi2ss{q}", 972 WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; 973defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32, 974 "cvtsi2sd", "cvtsi2sd{l}", 975 WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; 976defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64, 977 "cvtsi2sd", "cvtsi2sd{q}", 978 WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; 979} // isCodeGenOnly = 1 980 981let Predicates = [UseSSE1] in { 982 def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; 983 def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; 984} 985 986let Predicates = [UseSSE2] in { 987 def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; 988 def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; 989} 990 991// Conversion Instructions Intrinsics - Match intrinsics which expect MM 992// and/or XMM operand(s). 993 994multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 995 ValueType DstVT, ValueType SrcVT, SDNode OpNode, 996 Operand memop, PatFrags mem_frags, string asm, 997 X86FoldableSchedWrite sched, Domain d> { 998let ExeDomain = d in { 999 def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1000 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1001 [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, 1002 Sched<[sched]>; 1003 def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1004 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1005 [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>, 1006 Sched<[sched.Folded]>; 1007} 1008} 1009 1010multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1011 RegisterClass DstRC, X86MemOperand x86memop, 1012 string asm, string mem, X86FoldableSchedWrite sched, 1013 Domain d, bit Is2Addr = 1> { 1014let hasSideEffects = 0, ExeDomain = d in { 1015 def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1016 !if(Is2Addr, 1017 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1018 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1019 []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 1020 let mayLoad = 1 in 1021 def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1022 (ins DstRC:$src1, x86memop:$src2), 1023 !if(Is2Addr, 1024 asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}", 1025 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 1026 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 1027} 1028} 1029 1030let Uses = [MXCSR], mayRaiseFPException = 1 in { 1031let Predicates = [UseAVX] in { 1032defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, 1033 X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1034 WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 1035defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, 1036 X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", 1037 WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; 1038} 1039defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, 1040 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1041 SSEPackedDouble>, XD; 1042defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, 1043 sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, 1044 SSEPackedDouble>, XD, REX_W; 1045} 1046 1047let Predicates = [UseAVX] in { 1048defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1049 i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, 1050 XS, VEX_4V, VEX_LIG, SIMD_EXC; 1051defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1052 i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, 1053 XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 1054defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1055 i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, 1056 XD, VEX_4V, VEX_LIG; 1057defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1058 i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, 1059 XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; 1060} 1061let Constraints = "$src1 = $dst" in { 1062 defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1063 i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, 1064 XS, SIMD_EXC; 1065 defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1066 i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, 1067 XS, REX_W, SIMD_EXC; 1068 defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1069 i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, 1070 XD; 1071 defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1072 i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, 1073 XD, REX_W, SIMD_EXC; 1074} 1075 1076def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1077 (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1078def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1079 (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1080def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1081 (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">; 1082def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1083 (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">; 1084 1085def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1086 (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1087def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1088 (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; 1089 1090def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1091 (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">; 1092def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1093 (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">; 1094def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1095 (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">; 1096def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1097 (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">; 1098 1099def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1100 (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1101def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1102 (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; 1103 1104/// SSE 1 Only 1105 1106// Aliases for intrinsics 1107let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1108defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1109 ssmem, sse_load_f32, "cvttss2si", 1110 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 1111defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1112 X86cvtts2Int, ssmem, sse_load_f32, 1113 "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1114 XS, VEX, VEX_LIG, VEX_W; 1115defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1116 sdmem, sse_load_f64, "cvttsd2si", 1117 WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; 1118defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1119 X86cvtts2Int, sdmem, sse_load_f64, 1120 "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, 1121 XD, VEX, VEX_LIG, VEX_W; 1122} 1123let Uses = [MXCSR], mayRaiseFPException = 1 in { 1124defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, 1125 ssmem, sse_load_f32, "cvttss2si", 1126 WriteCvtSS2I, SSEPackedSingle>, XS; 1127defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, 1128 X86cvtts2Int, ssmem, sse_load_f32, 1129 "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, 1130 XS, REX_W; 1131defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, 1132 sdmem, sse_load_f64, "cvttsd2si", 1133 WriteCvtSD2I, SSEPackedDouble>, XD; 1134defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, 1135 X86cvtts2Int, sdmem, sse_load_f64, 1136 "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, 1137 XD, REX_W; 1138} 1139 1140def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1141 (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1142def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1143 (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1144def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1145 (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1146def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1147 (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1148def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1149 (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1150def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1151 (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1152def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1153 (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1154def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1155 (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1156 1157def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1158 (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1159def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1160 (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; 1161def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1162 (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1163def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1164 (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; 1165def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1166 (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1167def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1168 (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; 1169def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1170 (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1171def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1172 (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; 1173 1174let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1175defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1176 ssmem, sse_load_f32, "cvtss2si", 1177 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; 1178defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1179 ssmem, sse_load_f32, "cvtss2si", 1180 WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; 1181} 1182let Uses = [MXCSR], mayRaiseFPException = 1 in { 1183defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, 1184 ssmem, sse_load_f32, "cvtss2si", 1185 WriteCvtSS2I, SSEPackedSingle>, XS; 1186defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, 1187 ssmem, sse_load_f32, "cvtss2si", 1188 WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; 1189 1190defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, 1191 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1192 SSEPackedSingle, WriteCvtI2PS>, 1193 PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1194defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load, 1195 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1196 SSEPackedSingle, WriteCvtI2PSY>, 1197 PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; 1198 1199defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop, 1200 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1201 SSEPackedSingle, WriteCvtI2PS>, 1202 PS, Requires<[UseSSE2]>; 1203} 1204 1205// AVX aliases 1206def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1207 (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1208def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1209 (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1210def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1211 (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1212def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1213 (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1214def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1215 (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1216def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1217 (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1218def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1219 (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1220def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1221 (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1222 1223// SSE aliases 1224def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1225 (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1226def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1227 (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0, "att">; 1228def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1229 (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; 1230def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1231 (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0, "att">; 1232def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1233 (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1234def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1235 (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0, "att">; 1236def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1237 (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; 1238def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1239 (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">; 1240 1241/// SSE 2 Only 1242 1243// Convert scalar double to scalar single 1244let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], 1245 ExeDomain = SSEPackedSingle in { 1246def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1247 (ins FR32:$src1, FR64:$src2), 1248 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1249 VEX_4V, VEX_LIG, VEX_WIG, 1250 Sched<[WriteCvtSD2SS]>, SIMD_EXC; 1251let mayLoad = 1 in 1252def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1253 (ins FR32:$src1, f64mem:$src2), 1254 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1255 XD, VEX_4V, VEX_LIG, VEX_WIG, 1256 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; 1257} 1258 1259def : Pat<(f32 (any_fpround FR64:$src)), 1260 (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, 1261 Requires<[UseAVX]>; 1262 1263let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 1264def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1265 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1266 [(set FR32:$dst, (any_fpround FR64:$src))]>, 1267 Sched<[WriteCvtSD2SS]>, SIMD_EXC; 1268def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1269 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1270 [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, 1271 XD, Requires<[UseSSE2, OptForSize]>, 1272 Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; 1273} 1274 1275let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { 1276def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1277 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1278 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1279 [(set VR128:$dst, 1280 (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1281 XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1282 Sched<[WriteCvtSD2SS]>; 1283def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1284 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1285 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1286 [(set VR128:$dst, 1287 (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1288 XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>, 1289 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1290let Constraints = "$src1 = $dst" in { 1291def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, 1292 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1293 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1294 [(set VR128:$dst, 1295 (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, 1296 XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; 1297def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, 1298 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1299 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1300 [(set VR128:$dst, 1301 (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>, 1302 XD, Requires<[UseSSE2]>, 1303 Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; 1304} 1305} 1306 1307// Convert scalar single to scalar double 1308// SSE2 instructions with XS prefix 1309let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 1310def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1311 (ins FR64:$src1, FR32:$src2), 1312 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1313 XS, VEX_4V, VEX_LIG, VEX_WIG, 1314 Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; 1315let mayLoad = 1 in 1316def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1317 (ins FR64:$src1, f32mem:$src2), 1318 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1319 XS, VEX_4V, VEX_LIG, VEX_WIG, 1320 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, 1321 Requires<[UseAVX, OptForSize]>, SIMD_EXC; 1322} // isCodeGenOnly = 1, hasSideEffects = 0 1323 1324def : Pat<(f64 (any_fpextend FR32:$src)), 1325 (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; 1326def : Pat<(any_fpextend (loadf32 addr:$src)), 1327 (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; 1328 1329let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { 1330def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1331 "cvtss2sd\t{$src, $dst|$dst, $src}", 1332 [(set FR64:$dst, (any_fpextend FR32:$src))]>, 1333 XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; 1334def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1335 "cvtss2sd\t{$src, $dst|$dst, $src}", 1336 [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, 1337 XS, Requires<[UseSSE2, OptForSize]>, 1338 Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; 1339} // isCodeGenOnly = 1 1340 1341let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, 1342 ExeDomain = SSEPackedSingle in { 1343def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1344 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1345 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1346 []>, XS, VEX_4V, VEX_LIG, VEX_WIG, 1347 Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>; 1348let mayLoad = 1 in 1349def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1350 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1351 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1352 []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>, 1353 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1354let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1355def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, 1356 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1357 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1358 []>, XS, Requires<[UseSSE2]>, 1359 Sched<[WriteCvtSS2SD]>; 1360let mayLoad = 1 in 1361def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, 1362 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1363 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1364 []>, XS, Requires<[UseSSE2]>, 1365 Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; 1366} 1367} // hasSideEffects = 0 1368 1369// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and 1370// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary 1371// vmovs{s,d} instructions 1372let Predicates = [UseAVX] in { 1373def : Pat<(v4f32 (X86Movss 1374 (v4f32 VR128:$dst), 1375 (v4f32 (scalar_to_vector 1376 (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1377 (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1378 1379def : Pat<(v2f64 (X86Movsd 1380 (v2f64 VR128:$dst), 1381 (v2f64 (scalar_to_vector 1382 (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1383 (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1384 1385def : Pat<(v4f32 (X86Movss 1386 (v4f32 VR128:$dst), 1387 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 1388 (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1389 1390def : Pat<(v4f32 (X86Movss 1391 (v4f32 VR128:$dst), 1392 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 1393 (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1394 1395def : Pat<(v4f32 (X86Movss 1396 (v4f32 VR128:$dst), 1397 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 1398 (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1399 1400def : Pat<(v4f32 (X86Movss 1401 (v4f32 VR128:$dst), 1402 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 1403 (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1404 1405def : Pat<(v2f64 (X86Movsd 1406 (v2f64 VR128:$dst), 1407 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 1408 (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1409 1410def : Pat<(v2f64 (X86Movsd 1411 (v2f64 VR128:$dst), 1412 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 1413 (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1414 1415def : Pat<(v2f64 (X86Movsd 1416 (v2f64 VR128:$dst), 1417 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 1418 (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1419 1420def : Pat<(v2f64 (X86Movsd 1421 (v2f64 VR128:$dst), 1422 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 1423 (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1424} // Predicates = [UseAVX] 1425 1426let Predicates = [UseSSE2] in { 1427def : Pat<(v4f32 (X86Movss 1428 (v4f32 VR128:$dst), 1429 (v4f32 (scalar_to_vector 1430 (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), 1431 (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; 1432 1433def : Pat<(v2f64 (X86Movsd 1434 (v2f64 VR128:$dst), 1435 (v2f64 (scalar_to_vector 1436 (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), 1437 (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; 1438 1439def : Pat<(v2f64 (X86Movsd 1440 (v2f64 VR128:$dst), 1441 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 1442 (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; 1443 1444def : Pat<(v2f64 (X86Movsd 1445 (v2f64 VR128:$dst), 1446 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 1447 (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; 1448 1449def : Pat<(v2f64 (X86Movsd 1450 (v2f64 VR128:$dst), 1451 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 1452 (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; 1453 1454def : Pat<(v2f64 (X86Movsd 1455 (v2f64 VR128:$dst), 1456 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 1457 (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; 1458} // Predicates = [UseSSE2] 1459 1460let Predicates = [UseSSE1] in { 1461def : Pat<(v4f32 (X86Movss 1462 (v4f32 VR128:$dst), 1463 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 1464 (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; 1465 1466def : Pat<(v4f32 (X86Movss 1467 (v4f32 VR128:$dst), 1468 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 1469 (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; 1470 1471def : Pat<(v4f32 (X86Movss 1472 (v4f32 VR128:$dst), 1473 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 1474 (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; 1475 1476def : Pat<(v4f32 (X86Movss 1477 (v4f32 VR128:$dst), 1478 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 1479 (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; 1480} // Predicates = [UseSSE1] 1481 1482let Predicates = [HasAVX, NoVLX] in { 1483// Convert packed single/double fp to doubleword 1484def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1485 "cvtps2dq\t{$src, $dst|$dst, $src}", 1486 [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1487 VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; 1488def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1489 "cvtps2dq\t{$src, $dst|$dst, $src}", 1490 [(set VR128:$dst, 1491 (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, 1492 VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; 1493def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1494 "cvtps2dq\t{$src, $dst|$dst, $src}", 1495 [(set VR256:$dst, 1496 (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, 1497 VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; 1498def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1499 "cvtps2dq\t{$src, $dst|$dst, $src}", 1500 [(set VR256:$dst, 1501 (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, 1502 VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; 1503} 1504def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1505 "cvtps2dq\t{$src, $dst|$dst, $src}", 1506 [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, 1507 Sched<[WriteCvtPS2I]>, SIMD_EXC; 1508def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1509 "cvtps2dq\t{$src, $dst|$dst, $src}", 1510 [(set VR128:$dst, 1511 (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, 1512 Sched<[WriteCvtPS2ILd]>, SIMD_EXC; 1513 1514 1515// Convert Packed Double FP to Packed DW Integers 1516let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1517// The assembler can recognize rr 256-bit instructions by seeing a ymm 1518// register, but the same isn't true when using memory operands instead. 1519// Provide other assembly rr and rm forms to address this explicitly. 1520def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1521 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1522 [(set VR128:$dst, 1523 (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1524 VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1525 1526// XMM only 1527def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1528 "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}", 1529 [(set VR128:$dst, 1530 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, 1531 Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1532 1533// YMM only 1534def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1535 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1536 [(set VR128:$dst, 1537 (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, 1538 VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1539def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1540 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1541 [(set VR128:$dst, 1542 (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, 1543 VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1544} 1545 1546def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1547 (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1548def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}", 1549 (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1550 1551def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1552 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1553 [(set VR128:$dst, 1554 (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, 1555 Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 1556def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1557 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1558 [(set VR128:$dst, 1559 (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, 1560 Sched<[WriteCvtPD2I]>, SIMD_EXC; 1561 1562// Convert with truncation packed single/double fp to doubleword 1563// SSE2 packed instructions with XS prefix 1564let Uses = [MXCSR], mayRaiseFPException = 1 in { 1565let Predicates = [HasAVX, NoVLX] in { 1566def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1567 "cvttps2dq\t{$src, $dst|$dst, $src}", 1568 [(set VR128:$dst, 1569 (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 1570 VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; 1571def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1572 "cvttps2dq\t{$src, $dst|$dst, $src}", 1573 [(set VR128:$dst, 1574 (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, 1575 VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; 1576def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1577 "cvttps2dq\t{$src, $dst|$dst, $src}", 1578 [(set VR256:$dst, 1579 (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, 1580 VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; 1581def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1582 "cvttps2dq\t{$src, $dst|$dst, $src}", 1583 [(set VR256:$dst, 1584 (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, 1585 VEX, VEX_L, 1586 Sched<[WriteCvtPS2IYLd]>, VEX_WIG; 1587} 1588 1589def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1590 "cvttps2dq\t{$src, $dst|$dst, $src}", 1591 [(set VR128:$dst, 1592 (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, 1593 Sched<[WriteCvtPS2I]>; 1594def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1595 "cvttps2dq\t{$src, $dst|$dst, $src}", 1596 [(set VR128:$dst, 1597 (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, 1598 Sched<[WriteCvtPS2ILd]>; 1599} 1600 1601// The assembler can recognize rr 256-bit instructions by seeing a ymm 1602// register, but the same isn't true when using memory operands instead. 1603// Provide other assembly rr and rm forms to address this explicitly. 1604let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1605// XMM only 1606def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1607 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1608 [(set VR128:$dst, 1609 (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1610 VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; 1611def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1612 "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", 1613 [(set VR128:$dst, 1614 (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, 1615 VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; 1616 1617// YMM only 1618def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1619 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1620 [(set VR128:$dst, 1621 (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, 1622 VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; 1623def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1624 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 1625 [(set VR128:$dst, 1626 (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, 1627 VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; 1628} // Predicates = [HasAVX, NoVLX] 1629 1630def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 1631 (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">; 1632def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}", 1633 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">; 1634 1635let Predicates = [HasAVX, NoVLX] in { 1636 def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))), 1637 (VCVTTPD2DQYrr VR256:$src)>; 1638 def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))), 1639 (VCVTTPD2DQYrm addr:$src)>; 1640} 1641 1642def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1643 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1644 [(set VR128:$dst, 1645 (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, 1646 Sched<[WriteCvtPD2I]>, SIMD_EXC; 1647def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 1648 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1649 [(set VR128:$dst, 1650 (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, 1651 Sched<[WriteCvtPD2ILd]>, SIMD_EXC; 1652 1653// Convert packed single to packed double 1654let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1655 // SSE2 instructions without OpSize prefix 1656def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1657 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1658 [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1659 PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; 1660def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1661 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1662 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1663 PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; 1664def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1665 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1666 [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, 1667 PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; 1668def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 1669 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1670 [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>, 1671 PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; 1672} 1673 1674let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { 1675def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1676 "cvtps2pd\t{$src, $dst|$dst, $src}", 1677 [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, 1678 PS, Sched<[WriteCvtPS2PD]>; 1679def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1680 "cvtps2pd\t{$src, $dst|$dst, $src}", 1681 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>, 1682 PS, Sched<[WriteCvtPS2PD.Folded]>; 1683} 1684 1685// Convert Packed DW Integers to Packed Double FP 1686let Predicates = [HasAVX, NoVLX] in { 1687let hasSideEffects = 0, mayLoad = 1 in 1688def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1689 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1690 [(set VR128:$dst, 1691 (v2f64 (X86any_VSintToFP 1692 (bc_v4i32 1693 (v2i64 (scalar_to_vector 1694 (loadi64 addr:$src)))))))]>, 1695 VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; 1696def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1697 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1698 [(set VR128:$dst, 1699 (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 1700 VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; 1701def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 1702 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1703 [(set VR256:$dst, 1704 (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>, 1705 VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>, 1706 VEX_WIG; 1707def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1708 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1709 [(set VR256:$dst, 1710 (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>, 1711 VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG; 1712} 1713 1714let hasSideEffects = 0, mayLoad = 1 in 1715def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1716 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1717 [(set VR128:$dst, 1718 (v2f64 (X86any_VSintToFP 1719 (bc_v4i32 1720 (v2i64 (scalar_to_vector 1721 (loadi64 addr:$src)))))))]>, 1722 Sched<[WriteCvtI2PDLd]>; 1723def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1724 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1725 [(set VR128:$dst, 1726 (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, 1727 Sched<[WriteCvtI2PD]>; 1728 1729// AVX register conversion intrinsics 1730let Predicates = [HasAVX, NoVLX] in { 1731 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1732 (VCVTDQ2PDrm addr:$src)>; 1733} // Predicates = [HasAVX, NoVLX] 1734 1735// SSE2 register conversion intrinsics 1736let Predicates = [UseSSE2] in { 1737 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 1738 (CVTDQ2PDrm addr:$src)>; 1739} // Predicates = [UseSSE2] 1740 1741// Convert packed double to packed single 1742// The assembler can recognize rr 256-bit instructions by seeing a ymm 1743// register, but the same isn't true when using memory operands instead. 1744// Provide other assembly rr and rm forms to address this explicitly. 1745let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { 1746// XMM only 1747def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1748 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1749 [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 1750 VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; 1751def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1752 "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", 1753 [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>, 1754 VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; 1755 1756def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1757 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1758 [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>, 1759 VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG; 1760def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1761 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 1762 [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>, 1763 VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG; 1764} // Predicates = [HasAVX, NoVLX] 1765 1766def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 1767 (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">; 1768def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", 1769 (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">; 1770 1771def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1772 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1773 [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>, 1774 Sched<[WriteCvtPD2PS]>, SIMD_EXC; 1775def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1776 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1777 [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>, 1778 Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; 1779 1780//===----------------------------------------------------------------------===// 1781// SSE 1 & 2 - Compare Instructions 1782//===----------------------------------------------------------------------===// 1783 1784// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 1785multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 1786 Operand memop, SDNode OpNode, ValueType VT, 1787 PatFrag ld_frag, string asm, 1788 X86FoldableSchedWrite sched, 1789 PatFrags mem_frags> { 1790 def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 1791 (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm, 1792 [(set VR128:$dst, (OpNode (VT VR128:$src1), 1793 VR128:$src2, timm:$cc))]>, 1794 Sched<[sched]>, SIMD_EXC; 1795 let mayLoad = 1 in 1796 def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 1797 (ins VR128:$src1, memop:$src2, u8imm:$cc), asm, 1798 [(set VR128:$dst, (OpNode (VT VR128:$src1), 1799 (mem_frags addr:$src2), timm:$cc))]>, 1800 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1801 1802 let isCodeGenOnly = 1 in { 1803 let isCommutable = 1 in 1804 def rr : SIi8<0xC2, MRMSrcReg, 1805 (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1806 [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>, 1807 Sched<[sched]>, SIMD_EXC; 1808 def rm : SIi8<0xC2, MRMSrcMem, 1809 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1810 [(set RC:$dst, (OpNode RC:$src1, 1811 (ld_frag addr:$src2), timm:$cc))]>, 1812 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1813 } 1814} 1815 1816let ExeDomain = SSEPackedSingle in 1817defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 1818 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1819 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, 1820 XS, VEX_4V, VEX_LIG, VEX_WIG; 1821let ExeDomain = SSEPackedDouble in 1822defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 1823 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1824 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, 1825 XD, VEX_4V, VEX_LIG, VEX_WIG; 1826 1827let Constraints = "$src1 = $dst" in { 1828 let ExeDomain = SSEPackedSingle in 1829 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32, 1830 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1831 SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS; 1832 let ExeDomain = SSEPackedDouble in 1833 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64, 1834 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1835 SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD; 1836} 1837 1838// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 1839multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDPatternOperator OpNode, 1840 ValueType vt, X86MemOperand x86memop, 1841 PatFrag ld_frag, string OpcodeStr, Domain d, 1842 X86FoldableSchedWrite sched = WriteFComX> { 1843 let ExeDomain = d in { 1844 def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1845 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1846 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1847 Sched<[sched]>, SIMD_EXC; 1848 let mayLoad = 1 in 1849 def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1850 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1851 [(set EFLAGS, (OpNode (vt RC:$src1), 1852 (ld_frag addr:$src2)))]>, 1853 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1854} 1855} 1856 1857// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp 1858multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, 1859 ValueType vt, Operand memop, 1860 PatFrags mem_frags, string OpcodeStr, 1861 Domain d, 1862 X86FoldableSchedWrite sched = WriteFComX> { 1863let ExeDomain = d in { 1864 def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1865 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1866 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, 1867 Sched<[sched]>, SIMD_EXC; 1868let mayLoad = 1 in 1869 def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), 1870 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1871 [(set EFLAGS, (OpNode (vt RC:$src1), 1872 (mem_frags addr:$src2)))]>, 1873 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1874} 1875} 1876 1877let Defs = [EFLAGS] in { 1878 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1879 "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1880 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1881 "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1882 defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1883 "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1884 defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1885 "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1886 1887 let isCodeGenOnly = 1 in { 1888 defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1889 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1890 defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1891 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1892 1893 defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1894 sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; 1895 defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1896 sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; 1897 } 1898 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32, 1899 "ucomiss", SSEPackedSingle>, PS; 1900 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64, 1901 "ucomisd", SSEPackedDouble>, PD; 1902 defm COMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32, 1903 "comiss", SSEPackedSingle>, PS; 1904 defm COMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64, 1905 "comisd", SSEPackedDouble>, PD; 1906 1907 let isCodeGenOnly = 1 in { 1908 defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, 1909 sse_load_f32, "ucomiss", SSEPackedSingle>, PS; 1910 defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, 1911 sse_load_f64, "ucomisd", SSEPackedDouble>, PD; 1912 1913 defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, 1914 sse_load_f32, "comiss", SSEPackedSingle>, PS; 1915 defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, 1916 sse_load_f64, "comisd", SSEPackedDouble>, PD; 1917 } 1918} // Defs = [EFLAGS] 1919 1920// sse12_cmp_packed - sse 1 & 2 compare packed instructions 1921multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 1922 ValueType VT, string asm, 1923 X86FoldableSchedWrite sched, 1924 Domain d, PatFrag ld_frag> { 1925 let isCommutable = 1 in 1926 def rri : PIi8<0xC2, MRMSrcReg, 1927 (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, 1928 [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, 1929 Sched<[sched]>, SIMD_EXC; 1930 def rmi : PIi8<0xC2, MRMSrcMem, 1931 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, 1932 [(set RC:$dst, 1933 (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, 1934 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1935} 1936 1937defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1938 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1939 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; 1940defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1941 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1942 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; 1943defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32, 1944 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1945 SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG; 1946defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64, 1947 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 1948 SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG; 1949let Constraints = "$src1 = $dst" in { 1950 defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32, 1951 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1952 SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS; 1953 defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64, 1954 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 1955 SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; 1956} 1957 1958def CommutableCMPCC : PatLeaf<(timm), [{ 1959 uint64_t Imm = N->getZExtValue() & 0x7; 1960 return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); 1961}]>; 1962 1963// Patterns to select compares with loads in first operand. 1964let Predicates = [HasAVX] in { 1965 def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, 1966 CommutableCMPCC:$cc)), 1967 (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; 1968 1969 def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, 1970 CommutableCMPCC:$cc)), 1971 (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; 1972 1973 def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, 1974 CommutableCMPCC:$cc)), 1975 (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 1976 1977 def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, 1978 CommutableCMPCC:$cc)), 1979 (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 1980 1981 def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1982 CommutableCMPCC:$cc)), 1983 (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 1984 1985 def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 1986 CommutableCMPCC:$cc)), 1987 (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 1988} 1989 1990let Predicates = [UseSSE2] in { 1991 def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, 1992 CommutableCMPCC:$cc)), 1993 (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; 1994 1995 def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, 1996 CommutableCMPCC:$cc)), 1997 (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; 1998} 1999 2000let Predicates = [UseSSE1] in { 2001 def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, 2002 CommutableCMPCC:$cc)), 2003 (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; 2004 2005 def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, 2006 CommutableCMPCC:$cc)), 2007 (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; 2008} 2009 2010//===----------------------------------------------------------------------===// 2011// SSE 1 & 2 - Shuffle Instructions 2012//===----------------------------------------------------------------------===// 2013 2014/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 2015multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2016 ValueType vt, string asm, PatFrag mem_frag, 2017 X86FoldableSchedWrite sched, Domain d, 2018 bit IsCommutable = 0> { 2019 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2020 (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 2021 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2022 (i8 timm:$src3))))], d>, 2023 Sched<[sched.Folded, sched.ReadAfterFold]>; 2024 let isCommutable = IsCommutable in 2025 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2026 (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 2027 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2028 (i8 timm:$src3))))], d>, 2029 Sched<[sched]>; 2030} 2031 2032let Predicates = [HasAVX, NoVLX] in { 2033 defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2034 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2035 loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, 2036 PS, VEX_4V, VEX_WIG; 2037 defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2038 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2039 loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, 2040 PS, VEX_4V, VEX_L, VEX_WIG; 2041 defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2042 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2043 loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, 2044 PD, VEX_4V, VEX_WIG; 2045 defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2046 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2047 loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, 2048 PD, VEX_4V, VEX_L, VEX_WIG; 2049} 2050let Constraints = "$src1 = $dst" in { 2051 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2052 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2053 memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2054 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2055 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2056 memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2057} 2058 2059//===----------------------------------------------------------------------===// 2060// SSE 1 & 2 - Unpack FP Instructions 2061//===----------------------------------------------------------------------===// 2062 2063/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2064multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2065 PatFrag mem_frag, RegisterClass RC, 2066 X86MemOperand x86memop, string asm, 2067 X86FoldableSchedWrite sched, Domain d, 2068 bit IsCommutable = 0> { 2069 let isCommutable = IsCommutable in 2070 def rr : PI<opc, MRMSrcReg, 2071 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2072 asm, [(set RC:$dst, 2073 (vt (OpNode RC:$src1, RC:$src2)))], d>, 2074 Sched<[sched]>; 2075 def rm : PI<opc, MRMSrcMem, 2076 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2077 asm, [(set RC:$dst, 2078 (vt (OpNode RC:$src1, 2079 (mem_frag addr:$src2))))], d>, 2080 Sched<[sched.Folded, sched.ReadAfterFold]>; 2081} 2082 2083let Predicates = [HasAVX, NoVLX] in { 2084defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load, 2085 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2086 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2087defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load, 2088 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2089 SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG; 2090defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load, 2091 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2092 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; 2093defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load, 2094 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2095 SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; 2096 2097defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load, 2098 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2099 SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2100defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load, 2101 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2102 SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2103defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load, 2104 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2105 SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; 2106defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load, 2107 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2108 SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; 2109}// Predicates = [HasAVX, NoVLX] 2110 2111let Constraints = "$src1 = $dst" in { 2112 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop, 2113 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2114 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2115 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop, 2116 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2117 SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; 2118 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop, 2119 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2120 SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; 2121 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop, 2122 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2123 SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; 2124} // Constraints = "$src1 = $dst" 2125 2126let Predicates = [HasAVX1Only] in { 2127 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))), 2128 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2129 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2130 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2131 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))), 2132 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2133 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2134 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2135 2136 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2137 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2138 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2139 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2140 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2141 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2142 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2143 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2144} 2145 2146let Predicates = [UseSSE2] in { 2147 // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. 2148 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 2149 (v2f64 (simple_load addr:$src2)))), 2150 (MOVHPDrm VR128:$src1, addr:$src2)>; 2151} 2152 2153//===----------------------------------------------------------------------===// 2154// SSE 1 & 2 - Extract Floating-Point Sign mask 2155//===----------------------------------------------------------------------===// 2156 2157/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2158multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, 2159 string asm, Domain d> { 2160 def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2161 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2162 [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], d>, 2163 Sched<[WriteFMOVMSK]>; 2164} 2165 2166let Predicates = [HasAVX] in { 2167 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2168 SSEPackedSingle>, PS, VEX, VEX_WIG; 2169 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2170 SSEPackedDouble>, PD, VEX, VEX_WIG; 2171 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", 2172 SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG; 2173 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", 2174 SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG; 2175 2176 // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2177 def : Pat<(X86movmsk (v4i32 VR128:$src)), 2178 (VMOVMSKPSrr VR128:$src)>; 2179 def : Pat<(X86movmsk (v2i64 VR128:$src)), 2180 (VMOVMSKPDrr VR128:$src)>; 2181 def : Pat<(X86movmsk (v8i32 VR256:$src)), 2182 (VMOVMSKPSYrr VR256:$src)>; 2183 def : Pat<(X86movmsk (v4i64 VR256:$src)), 2184 (VMOVMSKPDYrr VR256:$src)>; 2185} 2186 2187defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", 2188 SSEPackedSingle>, PS; 2189defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", 2190 SSEPackedDouble>, PD; 2191 2192let Predicates = [UseSSE2] in { 2193 // Also support integer VTs to avoid a int->fp bitcast in the DAG. 2194 def : Pat<(X86movmsk (v4i32 VR128:$src)), 2195 (MOVMSKPSrr VR128:$src)>; 2196 def : Pat<(X86movmsk (v2i64 VR128:$src)), 2197 (MOVMSKPDrr VR128:$src)>; 2198} 2199 2200//===---------------------------------------------------------------------===// 2201// SSE2 - Packed Integer Logical Instructions 2202//===---------------------------------------------------------------------===// 2203 2204let ExeDomain = SSEPackedInt in { // SSE integer instructions 2205 2206/// PDI_binop_rm - Simple SSE2 binary operator. 2207multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2208 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2209 X86MemOperand x86memop, X86FoldableSchedWrite sched, 2210 bit IsCommutable, bit Is2Addr> { 2211 let isCommutable = IsCommutable in 2212 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2213 (ins RC:$src1, RC:$src2), 2214 !if(Is2Addr, 2215 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2216 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2217 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 2218 Sched<[sched]>; 2219 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2220 (ins RC:$src1, x86memop:$src2), 2221 !if(Is2Addr, 2222 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2223 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2224 [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 2225 Sched<[sched.Folded, sched.ReadAfterFold]>; 2226} 2227} // ExeDomain = SSEPackedInt 2228 2229multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2230 ValueType OpVT128, ValueType OpVT256, 2231 X86SchedWriteWidths sched, bit IsCommutable, 2232 Predicate prd> { 2233let Predicates = [HasAVX, prd] in 2234 defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2235 VR128, load, i128mem, sched.XMM, 2236 IsCommutable, 0>, VEX_4V, VEX_WIG; 2237 2238let Constraints = "$src1 = $dst" in 2239 defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2240 memop, i128mem, sched.XMM, IsCommutable, 1>; 2241 2242let Predicates = [HasAVX2, prd] in 2243 defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2244 OpVT256, VR256, load, i256mem, sched.YMM, 2245 IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; 2246} 2247 2248// These are ordered here for pattern ordering requirements with the fp versions 2249 2250defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2251 SchedWriteVecLogic, 1, NoVLX>; 2252defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2253 SchedWriteVecLogic, 1, NoVLX>; 2254defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2255 SchedWriteVecLogic, 1, NoVLX>; 2256defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2257 SchedWriteVecLogic, 0, NoVLX>; 2258 2259//===----------------------------------------------------------------------===// 2260// SSE 1 & 2 - Logical Instructions 2261//===----------------------------------------------------------------------===// 2262 2263/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2264/// 2265/// There are no patterns here because isel prefers integer versions for SSE2 2266/// and later. There are SSE1 v4f32 patterns later. 2267multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2268 X86SchedWriteWidths sched> { 2269 let Predicates = [HasAVX, NoVLX] in { 2270 defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2271 !strconcat(OpcodeStr, "ps"), f256mem, sched.YMM, 2272 [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2273 2274 defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2275 !strconcat(OpcodeStr, "pd"), f256mem, sched.YMM, 2276 [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2277 2278 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2279 !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2280 [], [], 0>, PS, VEX_4V, VEX_WIG; 2281 2282 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2283 !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2284 [], [], 0>, PD, VEX_4V, VEX_WIG; 2285 } 2286 2287 let Constraints = "$src1 = $dst" in { 2288 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2289 !strconcat(OpcodeStr, "ps"), f128mem, sched.XMM, 2290 [], []>, PS; 2291 2292 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2293 !strconcat(OpcodeStr, "pd"), f128mem, sched.XMM, 2294 [], []>, PD; 2295 } 2296} 2297 2298defm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>; 2299defm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>; 2300defm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>; 2301let isCommutable = 0 in 2302 defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>; 2303 2304let Predicates = [HasAVX2, NoVLX] in { 2305 def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2306 (VPANDYrr VR256:$src1, VR256:$src2)>; 2307 def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2308 (VPANDYrr VR256:$src1, VR256:$src2)>; 2309 def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2310 (VPANDYrr VR256:$src1, VR256:$src2)>; 2311 2312 def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2313 (VPORYrr VR256:$src1, VR256:$src2)>; 2314 def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2315 (VPORYrr VR256:$src1, VR256:$src2)>; 2316 def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2317 (VPORYrr VR256:$src1, VR256:$src2)>; 2318 2319 def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2320 (VPXORYrr VR256:$src1, VR256:$src2)>; 2321 def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2322 (VPXORYrr VR256:$src1, VR256:$src2)>; 2323 def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2324 (VPXORYrr VR256:$src1, VR256:$src2)>; 2325 2326 def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2327 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2328 def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2329 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2330 def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2331 (VPANDNYrr VR256:$src1, VR256:$src2)>; 2332 2333 def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2334 (VPANDYrm VR256:$src1, addr:$src2)>; 2335 def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2336 (VPANDYrm VR256:$src1, addr:$src2)>; 2337 def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2338 (VPANDYrm VR256:$src1, addr:$src2)>; 2339 2340 def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2341 (VPORYrm VR256:$src1, addr:$src2)>; 2342 def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2343 (VPORYrm VR256:$src1, addr:$src2)>; 2344 def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2345 (VPORYrm VR256:$src1, addr:$src2)>; 2346 2347 def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2348 (VPXORYrm VR256:$src1, addr:$src2)>; 2349 def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2350 (VPXORYrm VR256:$src1, addr:$src2)>; 2351 def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2352 (VPXORYrm VR256:$src1, addr:$src2)>; 2353 2354 def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2355 (VPANDNYrm VR256:$src1, addr:$src2)>; 2356 def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2357 (VPANDNYrm VR256:$src1, addr:$src2)>; 2358 def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2359 (VPANDNYrm VR256:$src1, addr:$src2)>; 2360} 2361 2362// If only AVX1 is supported, we need to handle integer operations with 2363// floating point instructions since the integer versions aren't available. 2364let Predicates = [HasAVX1Only] in { 2365 def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)), 2366 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2367 def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)), 2368 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2369 def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)), 2370 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2371 def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)), 2372 (VANDPSYrr VR256:$src1, VR256:$src2)>; 2373 2374 def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)), 2375 (VORPSYrr VR256:$src1, VR256:$src2)>; 2376 def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)), 2377 (VORPSYrr VR256:$src1, VR256:$src2)>; 2378 def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)), 2379 (VORPSYrr VR256:$src1, VR256:$src2)>; 2380 def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)), 2381 (VORPSYrr VR256:$src1, VR256:$src2)>; 2382 2383 def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)), 2384 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2385 def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)), 2386 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2387 def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)), 2388 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2389 def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)), 2390 (VXORPSYrr VR256:$src1, VR256:$src2)>; 2391 2392 def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)), 2393 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2394 def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)), 2395 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2396 def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)), 2397 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2398 def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)), 2399 (VANDNPSYrr VR256:$src1, VR256:$src2)>; 2400 2401 def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)), 2402 (VANDPSYrm VR256:$src1, addr:$src2)>; 2403 def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)), 2404 (VANDPSYrm VR256:$src1, addr:$src2)>; 2405 def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)), 2406 (VANDPSYrm VR256:$src1, addr:$src2)>; 2407 def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)), 2408 (VANDPSYrm VR256:$src1, addr:$src2)>; 2409 2410 def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)), 2411 (VORPSYrm VR256:$src1, addr:$src2)>; 2412 def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)), 2413 (VORPSYrm VR256:$src1, addr:$src2)>; 2414 def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)), 2415 (VORPSYrm VR256:$src1, addr:$src2)>; 2416 def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)), 2417 (VORPSYrm VR256:$src1, addr:$src2)>; 2418 2419 def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)), 2420 (VXORPSYrm VR256:$src1, addr:$src2)>; 2421 def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)), 2422 (VXORPSYrm VR256:$src1, addr:$src2)>; 2423 def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)), 2424 (VXORPSYrm VR256:$src1, addr:$src2)>; 2425 def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)), 2426 (VXORPSYrm VR256:$src1, addr:$src2)>; 2427 2428 def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)), 2429 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2430 def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)), 2431 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2432 def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)), 2433 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2434 def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)), 2435 (VANDNPSYrm VR256:$src1, addr:$src2)>; 2436} 2437 2438let Predicates = [HasAVX, NoVLX] in { 2439 def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2440 (VPANDrr VR128:$src1, VR128:$src2)>; 2441 def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2442 (VPANDrr VR128:$src1, VR128:$src2)>; 2443 def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2444 (VPANDrr VR128:$src1, VR128:$src2)>; 2445 2446 def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2447 (VPORrr VR128:$src1, VR128:$src2)>; 2448 def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2449 (VPORrr VR128:$src1, VR128:$src2)>; 2450 def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2451 (VPORrr VR128:$src1, VR128:$src2)>; 2452 2453 def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2454 (VPXORrr VR128:$src1, VR128:$src2)>; 2455 def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2456 (VPXORrr VR128:$src1, VR128:$src2)>; 2457 def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2458 (VPXORrr VR128:$src1, VR128:$src2)>; 2459 2460 def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2461 (VPANDNrr VR128:$src1, VR128:$src2)>; 2462 def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2463 (VPANDNrr VR128:$src1, VR128:$src2)>; 2464 def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2465 (VPANDNrr VR128:$src1, VR128:$src2)>; 2466 2467 def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)), 2468 (VPANDrm VR128:$src1, addr:$src2)>; 2469 def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)), 2470 (VPANDrm VR128:$src1, addr:$src2)>; 2471 def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)), 2472 (VPANDrm VR128:$src1, addr:$src2)>; 2473 2474 def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)), 2475 (VPORrm VR128:$src1, addr:$src2)>; 2476 def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)), 2477 (VPORrm VR128:$src1, addr:$src2)>; 2478 def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)), 2479 (VPORrm VR128:$src1, addr:$src2)>; 2480 2481 def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)), 2482 (VPXORrm VR128:$src1, addr:$src2)>; 2483 def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)), 2484 (VPXORrm VR128:$src1, addr:$src2)>; 2485 def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)), 2486 (VPXORrm VR128:$src1, addr:$src2)>; 2487 2488 def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)), 2489 (VPANDNrm VR128:$src1, addr:$src2)>; 2490 def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)), 2491 (VPANDNrm VR128:$src1, addr:$src2)>; 2492 def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)), 2493 (VPANDNrm VR128:$src1, addr:$src2)>; 2494} 2495 2496let Predicates = [UseSSE2] in { 2497 def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), 2498 (PANDrr VR128:$src1, VR128:$src2)>; 2499 def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), 2500 (PANDrr VR128:$src1, VR128:$src2)>; 2501 def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), 2502 (PANDrr VR128:$src1, VR128:$src2)>; 2503 2504 def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), 2505 (PORrr VR128:$src1, VR128:$src2)>; 2506 def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), 2507 (PORrr VR128:$src1, VR128:$src2)>; 2508 def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), 2509 (PORrr VR128:$src1, VR128:$src2)>; 2510 2511 def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), 2512 (PXORrr VR128:$src1, VR128:$src2)>; 2513 def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), 2514 (PXORrr VR128:$src1, VR128:$src2)>; 2515 def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), 2516 (PXORrr VR128:$src1, VR128:$src2)>; 2517 2518 def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)), 2519 (PANDNrr VR128:$src1, VR128:$src2)>; 2520 def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)), 2521 (PANDNrr VR128:$src1, VR128:$src2)>; 2522 def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)), 2523 (PANDNrr VR128:$src1, VR128:$src2)>; 2524 2525 def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)), 2526 (PANDrm VR128:$src1, addr:$src2)>; 2527 def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)), 2528 (PANDrm VR128:$src1, addr:$src2)>; 2529 def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)), 2530 (PANDrm VR128:$src1, addr:$src2)>; 2531 2532 def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)), 2533 (PORrm VR128:$src1, addr:$src2)>; 2534 def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)), 2535 (PORrm VR128:$src1, addr:$src2)>; 2536 def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)), 2537 (PORrm VR128:$src1, addr:$src2)>; 2538 2539 def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)), 2540 (PXORrm VR128:$src1, addr:$src2)>; 2541 def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)), 2542 (PXORrm VR128:$src1, addr:$src2)>; 2543 def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)), 2544 (PXORrm VR128:$src1, addr:$src2)>; 2545 2546 def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)), 2547 (PANDNrm VR128:$src1, addr:$src2)>; 2548 def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)), 2549 (PANDNrm VR128:$src1, addr:$src2)>; 2550 def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)), 2551 (PANDNrm VR128:$src1, addr:$src2)>; 2552} 2553 2554// Patterns for packed operations when we don't have integer type available. 2555def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), 2556 (ANDPSrr VR128:$src1, VR128:$src2)>; 2557def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), 2558 (ORPSrr VR128:$src1, VR128:$src2)>; 2559def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), 2560 (XORPSrr VR128:$src1, VR128:$src2)>; 2561def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), 2562 (ANDNPSrr VR128:$src1, VR128:$src2)>; 2563 2564def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), 2565 (ANDPSrm VR128:$src1, addr:$src2)>; 2566def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), 2567 (ORPSrm VR128:$src1, addr:$src2)>; 2568def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), 2569 (XORPSrm VR128:$src1, addr:$src2)>; 2570def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), 2571 (ANDNPSrm VR128:$src1, addr:$src2)>; 2572 2573//===----------------------------------------------------------------------===// 2574// SSE 1 & 2 - Arithmetic Instructions 2575//===----------------------------------------------------------------------===// 2576 2577/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2578/// vector forms. 2579/// 2580/// In addition, we also have a special variant of the scalar form here to 2581/// represent the associated intrinsic operation. This form is unlike the 2582/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2583/// and leaves the top elements unmodified (therefore these cannot be commuted). 2584/// 2585/// These three forms can each be reg+reg or reg+mem. 2586/// 2587 2588/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2589/// classes below 2590multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 2591 SDPatternOperator OpNode, X86SchedWriteSizes sched> { 2592let Uses = [MXCSR], mayRaiseFPException = 1 in { 2593 let Predicates = [HasAVX, NoVLX] in { 2594 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2595 VR128, v4f32, f128mem, loadv4f32, 2596 SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; 2597 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2598 VR128, v2f64, f128mem, loadv2f64, 2599 SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; 2600 2601 defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 2602 OpNode, VR256, v8f32, f256mem, loadv8f32, 2603 SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; 2604 defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 2605 OpNode, VR256, v4f64, f256mem, loadv4f64, 2606 SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; 2607 } 2608 2609 let Constraints = "$src1 = $dst" in { 2610 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2611 v4f32, f128mem, memopv4f32, SSEPackedSingle, 2612 sched.PS.XMM>, PS; 2613 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2614 v2f64, f128mem, memopv2f64, SSEPackedDouble, 2615 sched.PD.XMM>, PD; 2616 } 2617} 2618} 2619 2620multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2621 X86SchedWriteSizes sched> { 2622let Uses = [MXCSR], mayRaiseFPException = 1 in { 2623 defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2624 OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, 2625 XS, VEX_4V, VEX_LIG, VEX_WIG; 2626 defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2627 OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, 2628 XD, VEX_4V, VEX_LIG, VEX_WIG; 2629 2630 let Constraints = "$src1 = $dst" in { 2631 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2632 OpNode, FR32, f32mem, SSEPackedSingle, 2633 sched.PS.Scl>, XS; 2634 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2635 OpNode, FR64, f64mem, SSEPackedDouble, 2636 sched.PD.Scl>, XD; 2637 } 2638} 2639} 2640 2641multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2642 SDPatternOperator OpNode, 2643 X86SchedWriteSizes sched> { 2644let Uses = [MXCSR], mayRaiseFPException = 1 in { 2645 defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 2646 !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2647 SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; 2648 defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 2649 !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2650 SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; 2651 2652 let Constraints = "$src1 = $dst" in { 2653 defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32, 2654 !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, 2655 SSEPackedSingle, sched.PS.Scl>, XS; 2656 defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64, 2657 !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, 2658 SSEPackedDouble, sched.PD.Scl>, XD; 2659 } 2660} 2661} 2662 2663// Binary Arithmetic instructions 2664defm ADD : basic_sse12_fp_binop_p<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2665 basic_sse12_fp_binop_s<0x58, "add", any_fadd, SchedWriteFAddSizes>, 2666 basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; 2667defm MUL : basic_sse12_fp_binop_p<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2668 basic_sse12_fp_binop_s<0x59, "mul", any_fmul, SchedWriteFMulSizes>, 2669 basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; 2670let isCommutable = 0 in { 2671 defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2672 basic_sse12_fp_binop_s<0x5C, "sub", any_fsub, SchedWriteFAddSizes>, 2673 basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; 2674 defm DIV : basic_sse12_fp_binop_p<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2675 basic_sse12_fp_binop_s<0x5E, "div", any_fdiv, SchedWriteFDivSizes>, 2676 basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; 2677 defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2678 basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, 2679 basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; 2680 defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2681 basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, 2682 basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; 2683} 2684 2685let isCodeGenOnly = 1 in { 2686 defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, 2687 basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; 2688 defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, 2689 basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; 2690} 2691 2692// Patterns used to select SSE scalar fp arithmetic instructions from 2693// either: 2694// 2695// (1) a scalar fp operation followed by a blend 2696// 2697// The effect is that the backend no longer emits unnecessary vector 2698// insert instructions immediately after SSE scalar fp instructions 2699// like addss or mulss. 2700// 2701// For example, given the following code: 2702// __m128 foo(__m128 A, __m128 B) { 2703// A[0] += B[0]; 2704// return A; 2705// } 2706// 2707// Previously we generated: 2708// addss %xmm0, %xmm1 2709// movss %xmm1, %xmm0 2710// 2711// We now generate: 2712// addss %xmm1, %xmm0 2713// 2714// (2) a vector packed single/double fp operation followed by a vector insert 2715// 2716// The effect is that the backend converts the packed fp instruction 2717// followed by a vector insert into a single SSE scalar fp instruction. 2718// 2719// For example, given the following code: 2720// __m128 foo(__m128 A, __m128 B) { 2721// __m128 C = A + B; 2722// return (__m128) {c[0], a[1], a[2], a[3]}; 2723// } 2724// 2725// Previously we generated: 2726// addps %xmm0, %xmm1 2727// movss %xmm1, %xmm0 2728// 2729// We now generate: 2730// addss %xmm1, %xmm0 2731 2732// TODO: Some canonicalization in lowering would simplify the number of 2733// patterns we have to try to match. 2734multiclass scalar_math_patterns<SDPatternOperator Op, string OpcPrefix, SDNode Move, 2735 ValueType VT, ValueType EltTy, 2736 RegisterClass RC, PatFrag ld_frag, 2737 Predicate BasePredicate> { 2738 let Predicates = [BasePredicate] in { 2739 // extracted scalar math op with insert via movss/movsd 2740 def : Pat<(VT (Move (VT VR128:$dst), 2741 (VT (scalar_to_vector 2742 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2743 RC:$src))))), 2744 (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst, 2745 (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2746 def : Pat<(VT (Move (VT VR128:$dst), 2747 (VT (scalar_to_vector 2748 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2749 (ld_frag addr:$src)))))), 2750 (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2751 } 2752 2753 // Repeat for AVX versions of the instructions. 2754 let Predicates = [UseAVX] in { 2755 // extracted scalar math op with insert via movss/movsd 2756 def : Pat<(VT (Move (VT VR128:$dst), 2757 (VT (scalar_to_vector 2758 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2759 RC:$src))))), 2760 (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst, 2761 (VT (COPY_TO_REGCLASS RC:$src, VR128)))>; 2762 def : Pat<(VT (Move (VT VR128:$dst), 2763 (VT (scalar_to_vector 2764 (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), 2765 (ld_frag addr:$src)))))), 2766 (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>; 2767 } 2768} 2769 2770defm : scalar_math_patterns<any_fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2771defm : scalar_math_patterns<any_fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2772defm : scalar_math_patterns<any_fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2773defm : scalar_math_patterns<any_fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>; 2774 2775defm : scalar_math_patterns<any_fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2776defm : scalar_math_patterns<any_fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2777defm : scalar_math_patterns<any_fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2778defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>; 2779 2780/// Unop Arithmetic 2781/// In addition, we also have a special variant of the scalar form here to 2782/// represent the associated intrinsic operation. This form is unlike the 2783/// plain scalar form, in that it takes an entire vector (instead of a 2784/// scalar) and leaves the top elements undefined. 2785/// 2786/// And, we have a special variant form for a full-vector intrinsic form. 2787 2788/// sse_fp_unop_s - SSE1 unops in scalar form 2789/// For the non-AVX defs, we need $src1 to be tied to $dst because 2790/// the HW instructions are 2 operand / destructive. 2791multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2792 X86MemOperand x86memop, Operand intmemop, 2793 SDPatternOperator OpNode, Domain d, 2794 X86FoldableSchedWrite sched, Predicate target> { 2795 let isCodeGenOnly = 1, hasSideEffects = 0 in { 2796 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 2797 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2798 [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>, 2799 Requires<[target]>; 2800 let mayLoad = 1 in 2801 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 2802 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 2803 [(set RC:$dst, (OpNode (load addr:$src1)))], d>, 2804 Sched<[sched.Folded]>, 2805 Requires<[target, OptForSize]>; 2806 } 2807 2808 let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { 2809 def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 2810 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2811 Sched<[sched]>; 2812 let mayLoad = 1 in 2813 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2), 2814 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, 2815 Sched<[sched.Folded, sched.ReadAfterFold]>; 2816 } 2817 2818} 2819 2820multiclass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 2821 Intrinsic Intr, Predicate target> { 2822 let Predicates = [target] in { 2823 // These are unary operations, but they are modeled as having 2 source operands 2824 // because the high elements of the destination are unchanged in SSE. 2825 def : Pat<(Intr VR128:$src), 2826 (!cast<Instruction>(NAME#r_Int) VR128:$src, VR128:$src)>; 2827 } 2828 // We don't want to fold scalar loads into these instructions unless 2829 // optimizing for size. This is because the folded instruction will have a 2830 // partial register update, while the unfolded sequence will not, e.g. 2831 // movss mem, %xmm0 2832 // rcpss %xmm0, %xmm0 2833 // which has a clobber before the rcp, vs. 2834 // rcpss mem, %xmm0 2835 let Predicates = [target, OptForSize] in { 2836 def : Pat<(Intr (mem_frags addr:$src2)), 2837 (!cast<Instruction>(NAME#m_Int) 2838 (vt (IMPLICIT_DEF)), addr:$src2)>; 2839 } 2840} 2841 2842multiclass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags, 2843 Intrinsic Intr, Predicate target> { 2844 let Predicates = [target] in { 2845 def : Pat<(Intr VR128:$src), 2846 (!cast<Instruction>(NAME#r_Int) VR128:$src, 2847 VR128:$src)>; 2848 } 2849 let Predicates = [target, OptForSize] in { 2850 def : Pat<(Intr (mem_frags addr:$src2)), 2851 (!cast<Instruction>(NAME#m_Int) 2852 (vt (IMPLICIT_DEF)), addr:$src2)>; 2853 } 2854} 2855 2856multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 2857 ValueType ScalarVT, X86MemOperand x86memop, 2858 Operand intmemop, SDPatternOperator OpNode, Domain d, 2859 X86FoldableSchedWrite sched, Predicate target> { 2860 let isCodeGenOnly = 1, hasSideEffects = 0 in { 2861 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 2862 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2863 [], d>, Sched<[sched]>; 2864 let mayLoad = 1 in 2865 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2866 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2867 [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2868 } 2869 let hasSideEffects = 0, ExeDomain = d in { 2870 def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 2871 (ins VR128:$src1, VR128:$src2), 2872 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2873 []>, Sched<[sched]>; 2874 let mayLoad = 1 in 2875 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 2876 (ins VR128:$src1, intmemop:$src2), 2877 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2878 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 2879 } 2880 2881 // We don't want to fold scalar loads into these instructions unless 2882 // optimizing for size. This is because the folded instruction will have a 2883 // partial register update, while the unfolded sequence will not, e.g. 2884 // vmovss mem, %xmm0 2885 // vrcpss %xmm0, %xmm0, %xmm0 2886 // which has a clobber before the rcp, vs. 2887 // vrcpss mem, %xmm0, %xmm0 2888 // TODO: In theory, we could fold the load, and avoid the stall caused by 2889 // the partial register store, either in BreakFalseDeps or with smarter RA. 2890 let Predicates = [target] in { 2891 def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r) 2892 (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 2893 } 2894 let Predicates = [target, OptForSize] in { 2895 def : Pat<(ScalarVT (OpNode (load addr:$src))), 2896 (!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)), 2897 addr:$src)>; 2898 } 2899} 2900 2901/// sse1_fp_unop_p - SSE1 unops in packed form. 2902multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2903 X86SchedWriteWidths sched, list<Predicate> prds> { 2904let Predicates = prds in { 2905 def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2906 !strconcat("v", OpcodeStr, 2907 "ps\t{$src, $dst|$dst, $src}"), 2908 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2909 VEX, Sched<[sched.XMM]>, VEX_WIG; 2910 def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2911 !strconcat("v", OpcodeStr, 2912 "ps\t{$src, $dst|$dst, $src}"), 2913 [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>, 2914 VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2915 def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2916 !strconcat("v", OpcodeStr, 2917 "ps\t{$src, $dst|$dst, $src}"), 2918 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>, 2919 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2920 def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2921 !strconcat("v", OpcodeStr, 2922 "ps\t{$src, $dst|$dst, $src}"), 2923 [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>, 2924 VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2925} 2926 2927 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2928 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2929 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>, 2930 Sched<[sched.XMM]>; 2931 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2932 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2933 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>, 2934 Sched<[sched.XMM.Folded]>; 2935} 2936 2937/// sse2_fp_unop_p - SSE2 unops in vector forms. 2938multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2939 SDPatternOperator OpNode, X86SchedWriteWidths sched> { 2940let Predicates = [HasAVX, NoVLX] in { 2941 def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2942 !strconcat("v", OpcodeStr, 2943 "pd\t{$src, $dst|$dst, $src}"), 2944 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2945 VEX, Sched<[sched.XMM]>, VEX_WIG; 2946 def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2947 !strconcat("v", OpcodeStr, 2948 "pd\t{$src, $dst|$dst, $src}"), 2949 [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>, 2950 VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; 2951 def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2952 !strconcat("v", OpcodeStr, 2953 "pd\t{$src, $dst|$dst, $src}"), 2954 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>, 2955 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 2956 def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2957 !strconcat("v", OpcodeStr, 2958 "pd\t{$src, $dst|$dst, $src}"), 2959 [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>, 2960 VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; 2961} 2962 2963 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2964 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2965 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>, 2966 Sched<[sched.XMM]>; 2967 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2968 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2969 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>, 2970 Sched<[sched.XMM.Folded]>; 2971} 2972 2973multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> { 2974 defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32, 2975 !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 2976 UseSSE1>, XS; 2977 defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32, 2978 !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss), 2979 AVXTarget>, 2980 XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; 2981} 2982 2983multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2984 X86SchedWriteWidths sched, Predicate AVXTarget> { 2985 defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem, 2986 ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS; 2987 defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32, 2988 f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>, 2989 XS, VEX_4V, VEX_LIG, VEX_WIG; 2990} 2991 2992multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 2993 X86SchedWriteWidths sched, Predicate AVXTarget> { 2994 defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem, 2995 sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD; 2996 defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64, 2997 f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>, 2998 XD, VEX_4V, VEX_LIG, VEX_WIG; 2999} 3000 3001// Square root. 3002defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, 3003 sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, 3004 sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, 3005 sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; 3006 3007// Reciprocal approximations. Note that these typically require refinement 3008// in order to obtain suitable precision. 3009defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>, 3010 sse1_fp_unop_s_intr<"rsqrt", HasAVX>, 3011 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>; 3012defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>, 3013 sse1_fp_unop_s_intr<"rcp", HasAVX>, 3014 sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>; 3015 3016// There is no f64 version of the reciprocal approximation instructions. 3017 3018multiclass scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, SDNode Move, 3019 ValueType VT, Predicate BasePredicate> { 3020 let Predicates = [BasePredicate] in { 3021 def : Pat<(VT (Move VT:$dst, (scalar_to_vector 3022 (OpNode (extractelt VT:$src, 0))))), 3023 (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3024 } 3025 3026 // Repeat for AVX versions of the instructions. 3027 let Predicates = [UseAVX] in { 3028 def : Pat<(VT (Move VT:$dst, (scalar_to_vector 3029 (OpNode (extractelt VT:$src, 0))))), 3030 (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3031 } 3032} 3033 3034defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; 3035defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; 3036 3037multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, 3038 SDNode Move, ValueType VT, 3039 Predicate BasePredicate> { 3040 let Predicates = [BasePredicate] in { 3041 def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 3042 (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3043 } 3044 3045 // Repeat for AVX versions of the instructions. 3046 let Predicates = [HasAVX] in { 3047 def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), 3048 (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; 3049 } 3050} 3051 3052defm : scalar_unary_math_intr_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss, 3053 v4f32, UseSSE1>; 3054defm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss, 3055 v4f32, UseSSE1>; 3056 3057 3058//===----------------------------------------------------------------------===// 3059// SSE 1 & 2 - Non-temporal stores 3060//===----------------------------------------------------------------------===// 3061 3062let AddedComplexity = 400 in { // Prefer non-temporal versions 3063let Predicates = [HasAVX, NoVLX] in { 3064let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3065def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3066 (ins f128mem:$dst, VR128:$src), 3067 "movntps\t{$src, $dst|$dst, $src}", 3068 [(alignednontemporalstore (v4f32 VR128:$src), 3069 addr:$dst)]>, VEX, VEX_WIG; 3070def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3071 (ins f128mem:$dst, VR128:$src), 3072 "movntpd\t{$src, $dst|$dst, $src}", 3073 [(alignednontemporalstore (v2f64 VR128:$src), 3074 addr:$dst)]>, VEX, VEX_WIG; 3075} // SchedRW 3076 3077let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { 3078def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3079 (ins f256mem:$dst, VR256:$src), 3080 "movntps\t{$src, $dst|$dst, $src}", 3081 [(alignednontemporalstore (v8f32 VR256:$src), 3082 addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3083def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3084 (ins f256mem:$dst, VR256:$src), 3085 "movntpd\t{$src, $dst|$dst, $src}", 3086 [(alignednontemporalstore (v4f64 VR256:$src), 3087 addr:$dst)]>, VEX, VEX_L, VEX_WIG; 3088} // SchedRW 3089 3090let ExeDomain = SSEPackedInt in { 3091def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3092 (ins i128mem:$dst, VR128:$src), 3093 "movntdq\t{$src, $dst|$dst, $src}", 3094 [(alignednontemporalstore (v2i64 VR128:$src), 3095 addr:$dst)]>, VEX, VEX_WIG, 3096 Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; 3097def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3098 (ins i256mem:$dst, VR256:$src), 3099 "movntdq\t{$src, $dst|$dst, $src}", 3100 [(alignednontemporalstore (v4i64 VR256:$src), 3101 addr:$dst)]>, VEX, VEX_L, VEX_WIG, 3102 Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; 3103} // ExeDomain 3104} // Predicates 3105 3106let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { 3107def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3108 "movntps\t{$src, $dst|$dst, $src}", 3109 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 3110def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3111 "movntpd\t{$src, $dst|$dst, $src}", 3112 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 3113} // SchedRW 3114 3115let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in 3116def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3117 "movntdq\t{$src, $dst|$dst, $src}", 3118 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; 3119 3120let SchedRW = [WriteStoreNT] in { 3121// There is no AVX form for instructions below this point 3122def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3123 "movnti{l}\t{$src, $dst|$dst, $src}", 3124 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 3125 PS, Requires<[HasSSE2]>; 3126def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3127 "movnti{q}\t{$src, $dst|$dst, $src}", 3128 [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 3129 PS, Requires<[HasSSE2]>; 3130} // SchedRW = [WriteStoreNT] 3131 3132let Predicates = [HasAVX, NoVLX] in { 3133 def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 3134 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3135 def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 3136 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3137 def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 3138 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3139 3140 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3141 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3142 def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3143 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3144 def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3145 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3146} 3147 3148let Predicates = [UseSSE2] in { 3149 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3150 (MOVNTDQmr addr:$dst, VR128:$src)>; 3151 def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3152 (MOVNTDQmr addr:$dst, VR128:$src)>; 3153 def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3154 (MOVNTDQmr addr:$dst, VR128:$src)>; 3155} 3156 3157} // AddedComplexity 3158 3159//===----------------------------------------------------------------------===// 3160// SSE 1 & 2 - Prefetch and memory fence 3161//===----------------------------------------------------------------------===// 3162 3163// Prefetch intrinsic. 3164let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { 3165def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3166 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; 3167def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3168 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; 3169def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3170 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; 3171def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3172 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; 3173} 3174 3175// FIXME: How should flush instruction be modeled? 3176let SchedRW = [WriteLoad] in { 3177// Flush cache 3178def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3179 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3180 PS, Requires<[HasSSE2]>; 3181} 3182 3183let SchedRW = [WriteNop] in { 3184// Pause. This "instruction" is encoded as "rep; nop", so even though it 3185// was introduced with SSE2, it's backward compatible. 3186def PAUSE : I<0x90, RawFrm, (outs), (ins), 3187 "pause", [(int_x86_sse2_pause)]>, OBXS; 3188} 3189 3190let SchedRW = [WriteFence] in { 3191// Load, store, and memory fence 3192// TODO: As with mfence, we may want to ease the availability of sfence/lfence 3193// to include any 64-bit target. 3194def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, 3195 PS, Requires<[HasSSE1]>; 3196def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, 3197 PS, Requires<[HasSSE2]>; 3198def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, 3199 PS, Requires<[HasMFence]>; 3200} // SchedRW 3201 3202def : Pat<(X86MFence), (MFENCE)>; 3203 3204//===----------------------------------------------------------------------===// 3205// SSE 1 & 2 - Load/Store XCSR register 3206//===----------------------------------------------------------------------===// 3207 3208let mayLoad=1, hasSideEffects=1 in 3209def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3210 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3211 VEX, Sched<[WriteLDMXCSR]>, VEX_WIG; 3212let mayStore=1, hasSideEffects=1 in 3213def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3214 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3215 VEX, Sched<[WriteSTMXCSR]>, VEX_WIG; 3216 3217let mayLoad=1, hasSideEffects=1 in 3218def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 3219 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, 3220 PS, Sched<[WriteLDMXCSR]>; 3221let mayStore=1, hasSideEffects=1 in 3222def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3223 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, 3224 PS, Sched<[WriteSTMXCSR]>; 3225 3226//===---------------------------------------------------------------------===// 3227// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3228//===---------------------------------------------------------------------===// 3229 3230let ExeDomain = SSEPackedInt in { // SSE integer instructions 3231 3232let hasSideEffects = 0 in { 3233def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3234 "movdqa\t{$src, $dst|$dst, $src}", []>, 3235 Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3236def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3237 "movdqu\t{$src, $dst|$dst, $src}", []>, 3238 Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG; 3239def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3240 "movdqa\t{$src, $dst|$dst, $src}", []>, 3241 Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3242def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3243 "movdqu\t{$src, $dst|$dst, $src}", []>, 3244 Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG; 3245} 3246 3247// For Disassembler 3248let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3249def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3250 "movdqa\t{$src, $dst|$dst, $src}", []>, 3251 Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3252 VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; 3253def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3254 "movdqa\t{$src, $dst|$dst, $src}", []>, 3255 Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3256 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">; 3257def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3258 "movdqu\t{$src, $dst|$dst, $src}", []>, 3259 Sched<[SchedWriteVecMoveLS.XMM.RR]>, 3260 VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; 3261def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3262 "movdqu\t{$src, $dst|$dst, $src}", []>, 3263 Sched<[SchedWriteVecMoveLS.YMM.RR]>, 3264 VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">; 3265} 3266 3267let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3268 hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3269def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3270 "movdqa\t{$src, $dst|$dst, $src}", 3271 [(set VR128:$dst, (alignedloadv2i64 addr:$src))]>, 3272 Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 3273def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3274 "movdqa\t{$src, $dst|$dst, $src}", []>, 3275 Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3276 VEX, VEX_L, VEX_WIG; 3277def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3278 "vmovdqu\t{$src, $dst|$dst, $src}", 3279 [(set VR128:$dst, (loadv2i64 addr:$src))]>, 3280 Sched<[SchedWriteVecMoveLS.XMM.RM]>, 3281 XS, VEX, VEX_WIG; 3282def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3283 "vmovdqu\t{$src, $dst|$dst, $src}", []>, 3284 Sched<[SchedWriteVecMoveLS.YMM.RM]>, 3285 XS, VEX, VEX_L, VEX_WIG; 3286} 3287 3288let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in { 3289def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3290 (ins i128mem:$dst, VR128:$src), 3291 "movdqa\t{$src, $dst|$dst, $src}", 3292 [(alignedstore (v2i64 VR128:$src), addr:$dst)]>, 3293 Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG; 3294def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3295 (ins i256mem:$dst, VR256:$src), 3296 "movdqa\t{$src, $dst|$dst, $src}", []>, 3297 Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG; 3298def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3299 "vmovdqu\t{$src, $dst|$dst, $src}", 3300 [(store (v2i64 VR128:$src), addr:$dst)]>, 3301 Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG; 3302def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3303 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, 3304 Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG; 3305} 3306 3307let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in { 3308let hasSideEffects = 0 in { 3309def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3310 "movdqa\t{$src, $dst|$dst, $src}", []>; 3311 3312def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3313 "movdqu\t{$src, $dst|$dst, $src}", []>, 3314 XS, Requires<[UseSSE2]>; 3315} 3316 3317// For Disassembler 3318let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3319def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3320 "movdqa\t{$src, $dst|$dst, $src}", []>, 3321 FoldGenData<"MOVDQArr">; 3322 3323def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3324 "movdqu\t{$src, $dst|$dst, $src}", []>, 3325 XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">; 3326} 3327} // SchedRW 3328 3329let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3330 hasSideEffects = 0, SchedRW = [SchedWriteVecMoveLS.XMM.RM] in { 3331def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3332 "movdqa\t{$src, $dst|$dst, $src}", 3333 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 3334def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3335 "movdqu\t{$src, $dst|$dst, $src}", 3336 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 3337 XS, Requires<[UseSSE2]>; 3338} 3339 3340let mayStore = 1, hasSideEffects = 0, 3341 SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 3342def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3343 "movdqa\t{$src, $dst|$dst, $src}", 3344 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 3345def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3346 "movdqu\t{$src, $dst|$dst, $src}", 3347 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 3348 XS, Requires<[UseSSE2]>; 3349} 3350 3351} // ExeDomain = SSEPackedInt 3352 3353// Reversed version with ".s" suffix for GAS compatibility. 3354def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3355 (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3356def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}", 3357 (VMOVDQAYrr_REV VR256:$dst, VR256:$src), 0>; 3358def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3359 (VMOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3360def : InstAlias<"vmovdqu.s\t{$src, $dst|$dst, $src}", 3361 (VMOVDQUYrr_REV VR256:$dst, VR256:$src), 0>; 3362 3363// Reversed version with ".s" suffix for GAS compatibility. 3364def : InstAlias<"movdqa.s\t{$src, $dst|$dst, $src}", 3365 (MOVDQArr_REV VR128:$dst, VR128:$src), 0>; 3366def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}", 3367 (MOVDQUrr_REV VR128:$dst, VR128:$src), 0>; 3368 3369let Predicates = [HasAVX, NoVLX] in { 3370 // Additional patterns for other integer sizes. 3371 def : Pat<(alignedloadv4i32 addr:$src), 3372 (VMOVDQArm addr:$src)>; 3373 def : Pat<(alignedloadv8i16 addr:$src), 3374 (VMOVDQArm addr:$src)>; 3375 def : Pat<(alignedloadv16i8 addr:$src), 3376 (VMOVDQArm addr:$src)>; 3377 def : Pat<(loadv4i32 addr:$src), 3378 (VMOVDQUrm addr:$src)>; 3379 def : Pat<(loadv8i16 addr:$src), 3380 (VMOVDQUrm addr:$src)>; 3381 def : Pat<(loadv16i8 addr:$src), 3382 (VMOVDQUrm addr:$src)>; 3383 3384 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 3385 (VMOVDQAmr addr:$dst, VR128:$src)>; 3386 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 3387 (VMOVDQAmr addr:$dst, VR128:$src)>; 3388 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 3389 (VMOVDQAmr addr:$dst, VR128:$src)>; 3390 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 3391 (VMOVDQUmr addr:$dst, VR128:$src)>; 3392 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 3393 (VMOVDQUmr addr:$dst, VR128:$src)>; 3394 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 3395 (VMOVDQUmr addr:$dst, VR128:$src)>; 3396} 3397 3398//===---------------------------------------------------------------------===// 3399// SSE2 - Packed Integer Arithmetic Instructions 3400//===---------------------------------------------------------------------===// 3401 3402let ExeDomain = SSEPackedInt in { // SSE integer instructions 3403 3404/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 3405multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3406 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3407 PatFrag memop_frag, X86MemOperand x86memop, 3408 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3409 let isCommutable = 1 in 3410 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3411 (ins RC:$src1, RC:$src2), 3412 !if(Is2Addr, 3413 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3414 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3415 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 3416 Sched<[sched]>; 3417 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3418 (ins RC:$src1, x86memop:$src2), 3419 !if(Is2Addr, 3420 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3421 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3422 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 3423 (memop_frag addr:$src2))))]>, 3424 Sched<[sched.Folded, sched.ReadAfterFold]>; 3425} 3426} // ExeDomain = SSEPackedInt 3427 3428defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 3429 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3430defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 3431 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3432defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 3433 SchedWriteVecALU, 1, NoVLX>; 3434defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 3435 SchedWriteVecALU, 1, NoVLX>; 3436defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, 3437 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3438defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, 3439 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3440defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, 3441 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3442defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, 3443 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3444defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 3445 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3446defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 3447 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3448defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 3449 SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; 3450defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 3451 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3452defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 3453 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3454defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 3455 SchedWriteVecALU, 0, NoVLX>; 3456defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 3457 SchedWriteVecALU, 0, NoVLX>; 3458defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, 3459 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3460defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, 3461 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3462defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, 3463 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3464defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, 3465 SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; 3466defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, 3467 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3468defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, 3469 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3470defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, 3471 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3472defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, 3473 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3474defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, 3475 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3476defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, 3477 SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; 3478defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, 3479 SchedWriteVecIMul, 1, NoVLX>; 3480 3481let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3482defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3483 load, i128mem, SchedWriteVecIMul.XMM, 0>, 3484 VEX_4V, VEX_WIG; 3485 3486let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3487defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, 3488 VR256, load, i256mem, SchedWriteVecIMul.YMM, 3489 0>, VEX_4V, VEX_L, VEX_WIG; 3490let Constraints = "$src1 = $dst" in 3491defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, 3492 memop, i128mem, SchedWriteVecIMul.XMM>; 3493 3494let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3495defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, 3496 load, i128mem, SchedWritePSADBW.XMM, 0>, 3497 VEX_4V, VEX_WIG; 3498let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3499defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, 3500 load, i256mem, SchedWritePSADBW.YMM, 0>, 3501 VEX_4V, VEX_L, VEX_WIG; 3502let Constraints = "$src1 = $dst" in 3503defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, 3504 memop, i128mem, SchedWritePSADBW.XMM>; 3505 3506//===---------------------------------------------------------------------===// 3507// SSE2 - Packed Integer Logical Instructions 3508//===---------------------------------------------------------------------===// 3509 3510multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3511 string OpcodeStr, SDNode OpNode, 3512 SDNode OpNode2, RegisterClass RC, 3513 X86FoldableSchedWrite sched, 3514 X86FoldableSchedWrite schedImm, 3515 ValueType DstVT, ValueType SrcVT, 3516 PatFrag ld_frag, bit Is2Addr = 1> { 3517 // src2 is always 128-bit 3518 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3519 (ins RC:$src1, VR128:$src2), 3520 !if(Is2Addr, 3521 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3522 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3523 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))]>, 3524 Sched<[sched]>; 3525 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3526 (ins RC:$src1, i128mem:$src2), 3527 !if(Is2Addr, 3528 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3529 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3530 [(set RC:$dst, (DstVT (OpNode RC:$src1, 3531 (SrcVT (ld_frag addr:$src2)))))]>, 3532 Sched<[sched.Folded, sched.ReadAfterFold]>; 3533 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3534 (ins RC:$src1, u8imm:$src2), 3535 !if(Is2Addr, 3536 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3537 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3538 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, 3539 Sched<[schedImm]>; 3540} 3541 3542multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, 3543 string OpcodeStr, SDNode OpNode, 3544 SDNode OpNode2, ValueType DstVT128, 3545 ValueType DstVT256, ValueType SrcVT, 3546 X86SchedWriteWidths sched, 3547 X86SchedWriteWidths schedImm, Predicate prd> { 3548let Predicates = [HasAVX, prd] in 3549 defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3550 OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, 3551 DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG; 3552let Predicates = [HasAVX2, prd] in 3553 defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), 3554 OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, 3555 DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L, 3556 VEX_WIG; 3557let Constraints = "$src1 = $dst" in 3558 defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, 3559 VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, 3560 memop>; 3561} 3562 3563multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, 3564 SDNode OpNode, RegisterClass RC, ValueType VT, 3565 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 3566 def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), 3567 !if(Is2Addr, 3568 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3569 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3570 [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, 3571 Sched<[sched]>; 3572} 3573 3574multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr, 3575 SDNode OpNode, X86SchedWriteWidths sched> { 3576let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 3577 defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3578 VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG; 3579let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 3580 defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode, 3581 VR256, v32i8, sched.YMM, 0>, 3582 VEX_4V, VEX_L, VEX_WIG; 3583let Constraints = "$src1 = $dst" in 3584 defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8, 3585 sched.XMM>; 3586} 3587 3588let ExeDomain = SSEPackedInt in { 3589 defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 3590 v8i16, v16i16, v8i16, SchedWriteVecShift, 3591 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3592 defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 3593 v4i32, v8i32, v4i32, SchedWriteVecShift, 3594 SchedWriteVecShiftImm, NoVLX>; 3595 defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 3596 v2i64, v4i64, v2i64, SchedWriteVecShift, 3597 SchedWriteVecShiftImm, NoVLX>; 3598 3599 defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 3600 v8i16, v16i16, v8i16, SchedWriteVecShift, 3601 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3602 defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 3603 v4i32, v8i32, v4i32, SchedWriteVecShift, 3604 SchedWriteVecShiftImm, NoVLX>; 3605 defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 3606 v2i64, v4i64, v2i64, SchedWriteVecShift, 3607 SchedWriteVecShiftImm, NoVLX>; 3608 3609 defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 3610 v8i16, v16i16, v8i16, SchedWriteVecShift, 3611 SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; 3612 defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 3613 v4i32, v8i32, v4i32, SchedWriteVecShift, 3614 SchedWriteVecShiftImm, NoVLX>; 3615 3616 defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, 3617 SchedWriteShuffle>; 3618 defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq, 3619 SchedWriteShuffle>; 3620} // ExeDomain = SSEPackedInt 3621 3622//===---------------------------------------------------------------------===// 3623// SSE2 - Packed Integer Comparison Instructions 3624//===---------------------------------------------------------------------===// 3625 3626defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 3627 SchedWriteVecALU, 1, TruePredicate>; 3628defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 3629 SchedWriteVecALU, 1, TruePredicate>; 3630defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 3631 SchedWriteVecALU, 1, TruePredicate>; 3632defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 3633 SchedWriteVecALU, 0, TruePredicate>; 3634defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 3635 SchedWriteVecALU, 0, TruePredicate>; 3636defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 3637 SchedWriteVecALU, 0, TruePredicate>; 3638 3639//===---------------------------------------------------------------------===// 3640// SSE2 - Packed Integer Shuffle Instructions 3641//===---------------------------------------------------------------------===// 3642 3643let ExeDomain = SSEPackedInt in { 3644multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 3645 SDNode OpNode, X86SchedWriteWidths sched, 3646 Predicate prd> { 3647let Predicates = [HasAVX, prd] in { 3648 def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 3649 (ins VR128:$src1, u8imm:$src2), 3650 !strconcat("v", OpcodeStr, 3651 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3652 [(set VR128:$dst, 3653 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 3654 VEX, Sched<[sched.XMM]>, VEX_WIG; 3655 def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 3656 (ins i128mem:$src1, u8imm:$src2), 3657 !strconcat("v", OpcodeStr, 3658 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3659 [(set VR128:$dst, 3660 (vt128 (OpNode (load addr:$src1), 3661 (i8 timm:$src2))))]>, VEX, 3662 Sched<[sched.XMM.Folded]>, VEX_WIG; 3663} 3664 3665let Predicates = [HasAVX2, prd] in { 3666 def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 3667 (ins VR256:$src1, u8imm:$src2), 3668 !strconcat("v", OpcodeStr, 3669 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3670 [(set VR256:$dst, 3671 (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, 3672 VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; 3673 def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 3674 (ins i256mem:$src1, u8imm:$src2), 3675 !strconcat("v", OpcodeStr, 3676 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3677 [(set VR256:$dst, 3678 (vt256 (OpNode (load addr:$src1), 3679 (i8 timm:$src2))))]>, VEX, VEX_L, 3680 Sched<[sched.YMM.Folded]>, VEX_WIG; 3681} 3682 3683let Predicates = [UseSSE2] in { 3684 def ri : Ii8<0x70, MRMSrcReg, 3685 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 3686 !strconcat(OpcodeStr, 3687 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3688 [(set VR128:$dst, 3689 (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, 3690 Sched<[sched.XMM]>; 3691 def mi : Ii8<0x70, MRMSrcMem, 3692 (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 3693 !strconcat(OpcodeStr, 3694 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3695 [(set VR128:$dst, 3696 (vt128 (OpNode (memop addr:$src1), 3697 (i8 timm:$src2))))]>, 3698 Sched<[sched.XMM.Folded]>; 3699} 3700} 3701} // ExeDomain = SSEPackedInt 3702 3703defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, 3704 SchedWriteShuffle, NoVLX>, PD; 3705defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, 3706 SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; 3707defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, 3708 SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; 3709 3710//===---------------------------------------------------------------------===// 3711// Packed Integer Pack Instructions (SSE & AVX) 3712//===---------------------------------------------------------------------===// 3713 3714let ExeDomain = SSEPackedInt in { 3715multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3716 ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3717 X86MemOperand x86memop, X86FoldableSchedWrite sched, 3718 PatFrag ld_frag, bit Is2Addr = 1> { 3719 def rr : PDI<opc, MRMSrcReg, 3720 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3721 !if(Is2Addr, 3722 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3723 !strconcat(OpcodeStr, 3724 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3725 [(set RC:$dst, 3726 (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3727 Sched<[sched]>; 3728 def rm : PDI<opc, MRMSrcMem, 3729 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3730 !if(Is2Addr, 3731 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3732 !strconcat(OpcodeStr, 3733 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3734 [(set RC:$dst, 3735 (OutVT (OpNode (ArgVT RC:$src1), 3736 (ld_frag addr:$src2))))]>, 3737 Sched<[sched.Folded, sched.ReadAfterFold]>; 3738} 3739 3740multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 3741 ValueType ArgVT, SDNode OpNode, RegisterClass RC, 3742 X86MemOperand x86memop, X86FoldableSchedWrite sched, 3743 PatFrag ld_frag, bit Is2Addr = 1> { 3744 def rr : SS48I<opc, MRMSrcReg, 3745 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3746 !if(Is2Addr, 3747 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3748 !strconcat(OpcodeStr, 3749 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3750 [(set RC:$dst, 3751 (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))]>, 3752 Sched<[sched]>; 3753 def rm : SS48I<opc, MRMSrcMem, 3754 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3755 !if(Is2Addr, 3756 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3757 !strconcat(OpcodeStr, 3758 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3759 [(set RC:$dst, 3760 (OutVT (OpNode (ArgVT RC:$src1), 3761 (ld_frag addr:$src2))))]>, 3762 Sched<[sched.Folded, sched.ReadAfterFold]>; 3763} 3764 3765let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3766 defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, 3767 i128mem, SchedWriteShuffle.XMM, load, 0>, 3768 VEX_4V, VEX_WIG; 3769 defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, 3770 i128mem, SchedWriteShuffle.XMM, load, 0>, 3771 VEX_4V, VEX_WIG; 3772 3773 defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, 3774 i128mem, SchedWriteShuffle.XMM, load, 0>, 3775 VEX_4V, VEX_WIG; 3776 defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, 3777 i128mem, SchedWriteShuffle.XMM, load, 0>, 3778 VEX_4V, VEX_WIG; 3779} 3780 3781let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3782 defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, 3783 i256mem, SchedWriteShuffle.YMM, load, 0>, 3784 VEX_4V, VEX_L, VEX_WIG; 3785 defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, 3786 i256mem, SchedWriteShuffle.YMM, load, 0>, 3787 VEX_4V, VEX_L, VEX_WIG; 3788 3789 defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, 3790 i256mem, SchedWriteShuffle.YMM, load, 0>, 3791 VEX_4V, VEX_L, VEX_WIG; 3792 defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, 3793 i256mem, SchedWriteShuffle.YMM, load, 0>, 3794 VEX_4V, VEX_L, VEX_WIG; 3795} 3796 3797let Constraints = "$src1 = $dst" in { 3798 defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, 3799 i128mem, SchedWriteShuffle.XMM, memop>; 3800 defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, 3801 i128mem, SchedWriteShuffle.XMM, memop>; 3802 3803 defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, 3804 i128mem, SchedWriteShuffle.XMM, memop>; 3805 3806 defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, 3807 i128mem, SchedWriteShuffle.XMM, memop>; 3808} 3809} // ExeDomain = SSEPackedInt 3810 3811//===---------------------------------------------------------------------===// 3812// SSE2 - Packed Integer Unpack Instructions 3813//===---------------------------------------------------------------------===// 3814 3815let ExeDomain = SSEPackedInt in { 3816multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 3817 SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, 3818 X86FoldableSchedWrite sched, PatFrag ld_frag, 3819 bit Is2Addr = 1> { 3820 def rr : PDI<opc, MRMSrcReg, 3821 (outs RC:$dst), (ins RC:$src1, RC:$src2), 3822 !if(Is2Addr, 3823 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3824 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3825 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 3826 Sched<[sched]>; 3827 def rm : PDI<opc, MRMSrcMem, 3828 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3829 !if(Is2Addr, 3830 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3831 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3832 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 3833 Sched<[sched.Folded, sched.ReadAfterFold]>; 3834} 3835 3836let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 3837 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, 3838 i128mem, SchedWriteShuffle.XMM, load, 0>, 3839 VEX_4V, VEX_WIG; 3840 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, 3841 i128mem, SchedWriteShuffle.XMM, load, 0>, 3842 VEX_4V, VEX_WIG; 3843 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, 3844 i128mem, SchedWriteShuffle.XMM, load, 0>, 3845 VEX_4V, VEX_WIG; 3846 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, 3847 i128mem, SchedWriteShuffle.XMM, load, 0>, 3848 VEX_4V, VEX_WIG; 3849} 3850 3851let Predicates = [HasAVX, NoVLX] in { 3852 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, 3853 i128mem, SchedWriteShuffle.XMM, load, 0>, 3854 VEX_4V, VEX_WIG; 3855 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, 3856 i128mem, SchedWriteShuffle.XMM, load, 0>, 3857 VEX_4V, VEX_WIG; 3858 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, 3859 i128mem, SchedWriteShuffle.XMM, load, 0>, 3860 VEX_4V, VEX_WIG; 3861 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, 3862 i128mem, SchedWriteShuffle.XMM, load, 0>, 3863 VEX_4V, VEX_WIG; 3864} 3865 3866let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 3867 defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, 3868 i256mem, SchedWriteShuffle.YMM, load, 0>, 3869 VEX_4V, VEX_L, VEX_WIG; 3870 defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, 3871 i256mem, SchedWriteShuffle.YMM, load, 0>, 3872 VEX_4V, VEX_L, VEX_WIG; 3873 defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, 3874 i256mem, SchedWriteShuffle.YMM, load, 0>, 3875 VEX_4V, VEX_L, VEX_WIG; 3876 defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, 3877 i256mem, SchedWriteShuffle.YMM, load, 0>, 3878 VEX_4V, VEX_L, VEX_WIG; 3879} 3880 3881let Predicates = [HasAVX2, NoVLX] in { 3882 defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, 3883 i256mem, SchedWriteShuffle.YMM, load, 0>, 3884 VEX_4V, VEX_L, VEX_WIG; 3885 defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, 3886 i256mem, SchedWriteShuffle.YMM, load, 0>, 3887 VEX_4V, VEX_L, VEX_WIG; 3888 defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, 3889 i256mem, SchedWriteShuffle.YMM, load, 0>, 3890 VEX_4V, VEX_L, VEX_WIG; 3891 defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, 3892 i256mem, SchedWriteShuffle.YMM, load, 0>, 3893 VEX_4V, VEX_L, VEX_WIG; 3894} 3895 3896let Constraints = "$src1 = $dst" in { 3897 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, 3898 i128mem, SchedWriteShuffle.XMM, memop>; 3899 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, 3900 i128mem, SchedWriteShuffle.XMM, memop>; 3901 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, 3902 i128mem, SchedWriteShuffle.XMM, memop>; 3903 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, 3904 i128mem, SchedWriteShuffle.XMM, memop>; 3905 3906 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, 3907 i128mem, SchedWriteShuffle.XMM, memop>; 3908 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, 3909 i128mem, SchedWriteShuffle.XMM, memop>; 3910 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, 3911 i128mem, SchedWriteShuffle.XMM, memop>; 3912 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, 3913 i128mem, SchedWriteShuffle.XMM, memop>; 3914} 3915} // ExeDomain = SSEPackedInt 3916 3917//===---------------------------------------------------------------------===// 3918// SSE2 - Packed Integer Extract and Insert 3919//===---------------------------------------------------------------------===// 3920 3921let ExeDomain = SSEPackedInt in { 3922multiclass sse2_pinsrw<bit Is2Addr = 1> { 3923 def rr : Ii8<0xC4, MRMSrcReg, 3924 (outs VR128:$dst), (ins VR128:$src1, 3925 GR32orGR64:$src2, u8imm:$src3), 3926 !if(Is2Addr, 3927 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3928 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3929 [(set VR128:$dst, 3930 (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 3931 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 3932 def rm : Ii8<0xC4, MRMSrcMem, 3933 (outs VR128:$dst), (ins VR128:$src1, 3934 i16mem:$src2, u8imm:$src3), 3935 !if(Is2Addr, 3936 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3937 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3938 [(set VR128:$dst, 3939 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 3940 timm:$src3))]>, 3941 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 3942} 3943 3944// Extract 3945let Predicates = [HasAVX, NoBWI] in 3946def VPEXTRWrr : Ii8<0xC5, MRMSrcReg, 3947 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3948 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3949 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3950 timm:$src2))]>, 3951 PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; 3952def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, 3953 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 3954 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3955 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 3956 timm:$src2))]>, 3957 Sched<[WriteVecExtract]>; 3958 3959// Insert 3960let Predicates = [HasAVX, NoBWI] in 3961defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG; 3962 3963let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 3964defm PINSRW : sse2_pinsrw, PD; 3965 3966} // ExeDomain = SSEPackedInt 3967 3968//===---------------------------------------------------------------------===// 3969// SSE2 - Packed Mask Creation 3970//===---------------------------------------------------------------------===// 3971 3972let ExeDomain = SSEPackedInt in { 3973 3974def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3975 (ins VR128:$src), 3976 "pmovmskb\t{$src, $dst|$dst, $src}", 3977 [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3978 Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG; 3979 3980let Predicates = [HasAVX2] in { 3981def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 3982 (ins VR256:$src), 3983 "pmovmskb\t{$src, $dst|$dst, $src}", 3984 [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>, 3985 Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG; 3986} 3987 3988def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 3989 "pmovmskb\t{$src, $dst|$dst, $src}", 3990 [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>, 3991 Sched<[WriteVecMOVMSK]>; 3992 3993} // ExeDomain = SSEPackedInt 3994 3995//===---------------------------------------------------------------------===// 3996// SSE2 - Conditional Store 3997//===---------------------------------------------------------------------===// 3998 3999let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in { 4000let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 4001def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4002 (ins VR128:$src, VR128:$mask), 4003 "maskmovdqu\t{$mask, $src|$src, $mask}", 4004 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4005 VEX, VEX_WIG; 4006let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4007def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4008 (ins VR128:$src, VR128:$mask), 4009 "maskmovdqu\t{$mask, $src|$src, $mask}", 4010 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 4011 VEX, VEX_WIG, AdSize64; 4012let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in 4013def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs), 4014 (ins VR128:$src, VR128:$mask), "", 4015 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4016 VEX, VEX_WIG, AdSize32 { 4017 let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}"; 4018 let AsmVariantName = "NonParsable"; 4019} 4020 4021let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 4022def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4023 "maskmovdqu\t{$mask, $src|$src, $mask}", 4024 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 4025let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4026def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4027 "maskmovdqu\t{$mask, $src|$src, $mask}", 4028 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, 4029 AdSize64; 4030let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in 4031def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4032 "addr32 maskmovdqu\t{$mask, $src|$src, $mask}", 4033 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, 4034 AdSize32 { 4035 let AsmVariantName = "NonParsable"; 4036} 4037 4038} // ExeDomain = SSEPackedInt 4039 4040//===---------------------------------------------------------------------===// 4041// SSE2 - Move Doubleword/Quadword 4042//===---------------------------------------------------------------------===// 4043 4044//===---------------------------------------------------------------------===// 4045// Move Int Doubleword to Packed Double Int 4046// 4047let ExeDomain = SSEPackedInt in { 4048def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4049 "movd\t{$src, $dst|$dst, $src}", 4050 [(set VR128:$dst, 4051 (v4i32 (scalar_to_vector GR32:$src)))]>, 4052 VEX, Sched<[WriteVecMoveFromGpr]>; 4053def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4054 "movd\t{$src, $dst|$dst, $src}", 4055 [(set VR128:$dst, 4056 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4057 VEX, Sched<[WriteVecLoad]>; 4058def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4059 "movq\t{$src, $dst|$dst, $src}", 4060 [(set VR128:$dst, 4061 (v2i64 (scalar_to_vector GR64:$src)))]>, 4062 VEX, Sched<[WriteVecMoveFromGpr]>; 4063let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4064def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4065 "movq\t{$src, $dst|$dst, $src}", []>, 4066 VEX, Sched<[WriteVecLoad]>; 4067let isCodeGenOnly = 1 in 4068def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4069 "movq\t{$src, $dst|$dst, $src}", 4070 [(set FR64:$dst, (bitconvert GR64:$src))]>, 4071 VEX, Sched<[WriteVecMoveFromGpr]>; 4072 4073def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4074 "movd\t{$src, $dst|$dst, $src}", 4075 [(set VR128:$dst, 4076 (v4i32 (scalar_to_vector GR32:$src)))]>, 4077 Sched<[WriteVecMoveFromGpr]>; 4078def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4079 "movd\t{$src, $dst|$dst, $src}", 4080 [(set VR128:$dst, 4081 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4082 Sched<[WriteVecLoad]>; 4083def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4084 "movq\t{$src, $dst|$dst, $src}", 4085 [(set VR128:$dst, 4086 (v2i64 (scalar_to_vector GR64:$src)))]>, 4087 Sched<[WriteVecMoveFromGpr]>; 4088let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4089def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4090 "movq\t{$src, $dst|$dst, $src}", []>, 4091 Sched<[WriteVecLoad]>; 4092let isCodeGenOnly = 1 in 4093def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4094 "movq\t{$src, $dst|$dst, $src}", 4095 [(set FR64:$dst, (bitconvert GR64:$src))]>, 4096 Sched<[WriteVecMoveFromGpr]>; 4097} // ExeDomain = SSEPackedInt 4098 4099//===---------------------------------------------------------------------===// 4100// Move Int Doubleword to Single Scalar 4101// 4102let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4103 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4104 "movd\t{$src, $dst|$dst, $src}", 4105 [(set FR32:$dst, (bitconvert GR32:$src))]>, 4106 VEX, Sched<[WriteVecMoveFromGpr]>; 4107 4108 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4109 "movd\t{$src, $dst|$dst, $src}", 4110 [(set FR32:$dst, (bitconvert GR32:$src))]>, 4111 Sched<[WriteVecMoveFromGpr]>; 4112 4113} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4114 4115//===---------------------------------------------------------------------===// 4116// Move Packed Doubleword Int to Packed Double Int 4117// 4118let ExeDomain = SSEPackedInt in { 4119def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4120 "movd\t{$src, $dst|$dst, $src}", 4121 [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4122 (iPTR 0)))]>, VEX, 4123 Sched<[WriteVecMoveToGpr]>; 4124def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4125 (ins i32mem:$dst, VR128:$src), 4126 "movd\t{$src, $dst|$dst, $src}", 4127 [(store (i32 (extractelt (v4i32 VR128:$src), 4128 (iPTR 0))), addr:$dst)]>, 4129 VEX, Sched<[WriteVecStore]>; 4130def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4131 "movd\t{$src, $dst|$dst, $src}", 4132 [(set GR32:$dst, (extractelt (v4i32 VR128:$src), 4133 (iPTR 0)))]>, 4134 Sched<[WriteVecMoveToGpr]>; 4135def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4136 "movd\t{$src, $dst|$dst, $src}", 4137 [(store (i32 (extractelt (v4i32 VR128:$src), 4138 (iPTR 0))), addr:$dst)]>, 4139 Sched<[WriteVecStore]>; 4140} // ExeDomain = SSEPackedInt 4141 4142//===---------------------------------------------------------------------===// 4143// Move Packed Doubleword Int first element to Doubleword Int 4144// 4145let ExeDomain = SSEPackedInt in { 4146let SchedRW = [WriteVecMoveToGpr] in { 4147def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4148 "movq\t{$src, $dst|$dst, $src}", 4149 [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4150 (iPTR 0)))]>, 4151 VEX; 4152 4153def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4154 "movq\t{$src, $dst|$dst, $src}", 4155 [(set GR64:$dst, (extractelt (v2i64 VR128:$src), 4156 (iPTR 0)))]>; 4157} //SchedRW 4158 4159let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4160def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), 4161 (ins i64mem:$dst, VR128:$src), 4162 "movq\t{$src, $dst|$dst, $src}", []>, 4163 VEX, Sched<[WriteVecStore]>; 4164let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4165def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4166 "movq\t{$src, $dst|$dst, $src}", []>, 4167 Sched<[WriteVecStore]>; 4168} // ExeDomain = SSEPackedInt 4169 4170//===---------------------------------------------------------------------===// 4171// Bitcast FR64 <-> GR64 4172// 4173let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4174 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4175 "movq\t{$src, $dst|$dst, $src}", 4176 [(set GR64:$dst, (bitconvert FR64:$src))]>, 4177 VEX, Sched<[WriteVecMoveToGpr]>; 4178 4179 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4180 "movq\t{$src, $dst|$dst, $src}", 4181 [(set GR64:$dst, (bitconvert FR64:$src))]>, 4182 Sched<[WriteVecMoveToGpr]>; 4183} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4184 4185//===---------------------------------------------------------------------===// 4186// Move Scalar Single to Double Int 4187// 4188let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4189 def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4190 "movd\t{$src, $dst|$dst, $src}", 4191 [(set GR32:$dst, (bitconvert FR32:$src))]>, 4192 VEX, Sched<[WriteVecMoveToGpr]>; 4193 def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4194 "movd\t{$src, $dst|$dst, $src}", 4195 [(set GR32:$dst, (bitconvert FR32:$src))]>, 4196 Sched<[WriteVecMoveToGpr]>; 4197} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4198 4199let Predicates = [UseAVX] in { 4200 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4201 (VMOVDI2PDIrr GR32:$src)>; 4202 4203 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4204 (VMOV64toPQIrr GR64:$src)>; 4205 4206 // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 4207 // These instructions also write zeros in the high part of a 256-bit register. 4208 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4209 (VMOVDI2PDIrm addr:$src)>; 4210 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4211 (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; 4212} 4213 4214let Predicates = [UseSSE2] in { 4215 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4216 (MOVDI2PDIrr GR32:$src)>; 4217 4218 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4219 (MOV64toPQIrr GR64:$src)>; 4220 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4221 (MOVDI2PDIrm addr:$src)>; 4222} 4223 4224// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of 4225// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add 4226// these aliases. 4227def : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4228 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4229def : InstAlias<"movd\t{$src, $dst|$dst, $src}", 4230 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4231// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4232def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4233 (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4234def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4235 (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4236 4237//===---------------------------------------------------------------------===// 4238// SSE2 - Move Quadword 4239//===---------------------------------------------------------------------===// 4240 4241//===---------------------------------------------------------------------===// 4242// Move Quadword Int to Packed Quadword Int 4243// 4244 4245let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLoad] in { 4246def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4247 "vmovq\t{$src, $dst|$dst, $src}", 4248 [(set VR128:$dst, 4249 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4250 VEX, Requires<[UseAVX]>, VEX_WIG; 4251def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4252 "movq\t{$src, $dst|$dst, $src}", 4253 [(set VR128:$dst, 4254 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4255 XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4256} // ExeDomain, SchedRW 4257 4258//===---------------------------------------------------------------------===// 4259// Move Packed Quadword Int to Quadword Int 4260// 4261let ExeDomain = SSEPackedInt, SchedRW = [WriteVecStore] in { 4262def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4263 "movq\t{$src, $dst|$dst, $src}", 4264 [(store (i64 (extractelt (v2i64 VR128:$src), 4265 (iPTR 0))), addr:$dst)]>, 4266 VEX, VEX_WIG; 4267def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4268 "movq\t{$src, $dst|$dst, $src}", 4269 [(store (i64 (extractelt (v2i64 VR128:$src), 4270 (iPTR 0))), addr:$dst)]>; 4271} // ExeDomain, SchedRW 4272 4273// For disassembler only 4274let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4275 SchedRW = [SchedWriteVecLogic.XMM] in { 4276def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4277 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; 4278def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4279 "movq\t{$src, $dst|$dst, $src}", []>; 4280} 4281 4282def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4283 (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4284def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", 4285 (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; 4286 4287let Predicates = [UseAVX] in { 4288 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4289 (VMOVQI2PQIrm addr:$src)>; 4290 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4291 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; 4292 4293 def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4294 (VMOVPQI2QImr addr:$dst, VR128:$src)>; 4295} 4296 4297let Predicates = [UseSSE2] in { 4298 def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; 4299 4300 def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), 4301 (MOVPQI2QImr addr:$dst, VR128:$src)>; 4302} 4303 4304//===---------------------------------------------------------------------===// 4305// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4306// IA32 document. movq xmm1, xmm2 does clear the high bits. 4307// 4308let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4309def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4310 "vmovq\t{$src, $dst|$dst, $src}", 4311 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4312 XS, VEX, Requires<[UseAVX]>, VEX_WIG; 4313def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4314 "movq\t{$src, $dst|$dst, $src}", 4315 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4316 XS, Requires<[UseSSE2]>; 4317} // ExeDomain, SchedRW 4318 4319let Predicates = [UseAVX] in { 4320 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4321 (VMOVZPQILo2PQIrr VR128:$src)>; 4322} 4323let Predicates = [UseSSE2] in { 4324 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4325 (MOVZPQILo2PQIrr VR128:$src)>; 4326} 4327 4328let Predicates = [UseAVX] in { 4329 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 4330 (SUBREG_TO_REG (i32 0), 4331 (v2f64 (VMOVZPQILo2PQIrr 4332 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))), 4333 sub_xmm)>; 4334 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 4335 (SUBREG_TO_REG (i32 0), 4336 (v2i64 (VMOVZPQILo2PQIrr 4337 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))), 4338 sub_xmm)>; 4339} 4340 4341//===---------------------------------------------------------------------===// 4342// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4343//===---------------------------------------------------------------------===// 4344 4345multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4346 ValueType vt, RegisterClass RC, PatFrag mem_frag, 4347 X86MemOperand x86memop, X86FoldableSchedWrite sched> { 4348def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4349 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4350 [(set RC:$dst, (vt (OpNode RC:$src)))]>, 4351 Sched<[sched]>; 4352def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4353 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4354 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, 4355 Sched<[sched.Folded]>; 4356} 4357 4358let Predicates = [HasAVX, NoVLX] in { 4359 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4360 v4f32, VR128, loadv4f32, f128mem, 4361 SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4362 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4363 v4f32, VR128, loadv4f32, f128mem, 4364 SchedWriteFShuffle.XMM>, VEX, VEX_WIG; 4365 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4366 v8f32, VR256, loadv8f32, f256mem, 4367 SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4368 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4369 v8f32, VR256, loadv8f32, f256mem, 4370 SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; 4371} 4372defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4373 memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4374defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4375 memopv4f32, f128mem, SchedWriteFShuffle.XMM>; 4376 4377let Predicates = [HasAVX, NoVLX] in { 4378 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4379 (VMOVSHDUPrr VR128:$src)>; 4380 def : Pat<(v4i32 (X86Movshdup (load addr:$src))), 4381 (VMOVSHDUPrm addr:$src)>; 4382 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4383 (VMOVSLDUPrr VR128:$src)>; 4384 def : Pat<(v4i32 (X86Movsldup (load addr:$src))), 4385 (VMOVSLDUPrm addr:$src)>; 4386 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4387 (VMOVSHDUPYrr VR256:$src)>; 4388 def : Pat<(v8i32 (X86Movshdup (load addr:$src))), 4389 (VMOVSHDUPYrm addr:$src)>; 4390 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 4391 (VMOVSLDUPYrr VR256:$src)>; 4392 def : Pat<(v8i32 (X86Movsldup (load addr:$src))), 4393 (VMOVSLDUPYrm addr:$src)>; 4394} 4395 4396let Predicates = [UseSSE3] in { 4397 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4398 (MOVSHDUPrr VR128:$src)>; 4399 def : Pat<(v4i32 (X86Movshdup (memop addr:$src))), 4400 (MOVSHDUPrm addr:$src)>; 4401 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4402 (MOVSLDUPrr VR128:$src)>; 4403 def : Pat<(v4i32 (X86Movsldup (memop addr:$src))), 4404 (MOVSLDUPrm addr:$src)>; 4405} 4406 4407//===---------------------------------------------------------------------===// 4408// SSE3 - Replicate Double FP - MOVDDUP 4409//===---------------------------------------------------------------------===// 4410 4411multiclass sse3_replicate_dfp<string OpcodeStr, X86SchedWriteWidths sched> { 4412def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4413 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4414 [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, 4415 Sched<[sched.XMM]>; 4416def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 4417 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4418 [(set VR128:$dst, 4419 (v2f64 (X86Movddup 4420 (scalar_to_vector (loadf64 addr:$src)))))]>, 4421 Sched<[sched.XMM.Folded]>; 4422} 4423 4424// FIXME: Merge with above classes when there are patterns for the ymm version 4425multiclass sse3_replicate_dfp_y<string OpcodeStr, X86SchedWriteWidths sched> { 4426def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4427 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4428 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 4429 Sched<[sched.YMM]>; 4430def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 4431 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4432 [(set VR256:$dst, 4433 (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, 4434 Sched<[sched.YMM.Folded]>; 4435} 4436 4437let Predicates = [HasAVX, NoVLX] in { 4438 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, 4439 VEX, VEX_WIG; 4440 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, 4441 VEX, VEX_L, VEX_WIG; 4442} 4443 4444defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; 4445 4446 4447let Predicates = [HasAVX, NoVLX] in { 4448 def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4449 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4450} 4451 4452let Predicates = [UseSSE3] in { 4453 def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), 4454 (MOVDDUPrm addr:$src)>; 4455} 4456 4457//===---------------------------------------------------------------------===// 4458// SSE3 - Move Unaligned Integer 4459//===---------------------------------------------------------------------===// 4460 4461let Predicates = [HasAVX] in { 4462 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4463 "vlddqu\t{$src, $dst|$dst, $src}", 4464 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4465 Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; 4466 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4467 "vlddqu\t{$src, $dst|$dst, $src}", 4468 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 4469 Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; 4470} // Predicates 4471 4472def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4473 "lddqu\t{$src, $dst|$dst, $src}", 4474 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, 4475 Sched<[SchedWriteVecMoveLS.XMM.RM]>; 4476 4477//===---------------------------------------------------------------------===// 4478// SSE3 - Arithmetic 4479//===---------------------------------------------------------------------===// 4480 4481multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, 4482 X86MemOperand x86memop, X86FoldableSchedWrite sched, 4483 PatFrag ld_frag, bit Is2Addr = 1> { 4484let Uses = [MXCSR], mayRaiseFPException = 1 in { 4485 def rr : I<0xD0, MRMSrcReg, 4486 (outs RC:$dst), (ins RC:$src1, RC:$src2), 4487 !if(Is2Addr, 4488 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4489 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4490 [(set RC:$dst, (vt (X86Addsub RC:$src1, RC:$src2)))]>, 4491 Sched<[sched]>; 4492 def rm : I<0xD0, MRMSrcMem, 4493 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4494 !if(Is2Addr, 4495 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4496 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4497 [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, 4498 Sched<[sched.Folded, sched.ReadAfterFold]>; 4499} 4500} 4501 4502let Predicates = [HasAVX] in { 4503 let ExeDomain = SSEPackedSingle in { 4504 defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, 4505 SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>, 4506 XD, VEX_4V, VEX_WIG; 4507 defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, 4508 SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>, 4509 XD, VEX_4V, VEX_L, VEX_WIG; 4510 } 4511 let ExeDomain = SSEPackedDouble in { 4512 defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, 4513 SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>, 4514 PD, VEX_4V, VEX_WIG; 4515 defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, 4516 SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>, 4517 PD, VEX_4V, VEX_L, VEX_WIG; 4518 } 4519} 4520let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 4521 let ExeDomain = SSEPackedSingle in 4522 defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, 4523 SchedWriteFAddSizes.PS.XMM, memopv4f32>, XD; 4524 let ExeDomain = SSEPackedDouble in 4525 defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, 4526 SchedWriteFAddSizes.PD.XMM, memopv2f64>, PD; 4527} 4528 4529//===---------------------------------------------------------------------===// 4530// SSE3 Instructions 4531//===---------------------------------------------------------------------===// 4532 4533// Horizontal ops 4534multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4535 X86MemOperand x86memop, SDNode OpNode, 4536 X86FoldableSchedWrite sched, PatFrag ld_frag, 4537 bit Is2Addr = 1> { 4538let Uses = [MXCSR], mayRaiseFPException = 1 in { 4539 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4540 !if(Is2Addr, 4541 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4542 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4543 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4544 Sched<[sched]>; 4545 4546 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4547 !if(Is2Addr, 4548 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4549 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4550 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4551 Sched<[sched.Folded, sched.ReadAfterFold]>; 4552} 4553} 4554multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4555 X86MemOperand x86memop, SDNode OpNode, 4556 X86FoldableSchedWrite sched, PatFrag ld_frag, 4557 bit Is2Addr = 1> { 4558let Uses = [MXCSR], mayRaiseFPException = 1 in { 4559 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4560 !if(Is2Addr, 4561 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4562 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4563 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>, 4564 Sched<[sched]>; 4565 4566 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4567 !if(Is2Addr, 4568 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4569 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4570 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, 4571 Sched<[sched.Folded, sched.ReadAfterFold]>; 4572} 4573} 4574 4575let Predicates = [HasAVX] in { 4576 let ExeDomain = SSEPackedSingle in { 4577 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4578 X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4579 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4580 X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG; 4581 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4582 X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4583 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4584 X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; 4585 } 4586 let ExeDomain = SSEPackedDouble in { 4587 defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem, 4588 X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4589 defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem, 4590 X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG; 4591 defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem, 4592 X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4593 defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem, 4594 X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; 4595 } 4596} 4597 4598let Constraints = "$src1 = $dst" in { 4599 let ExeDomain = SSEPackedSingle in { 4600 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 4601 WriteFHAdd, memopv4f32>; 4602 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 4603 WriteFHAdd, memopv4f32>; 4604 } 4605 let ExeDomain = SSEPackedDouble in { 4606 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 4607 WriteFHAdd, memopv2f64>; 4608 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 4609 WriteFHAdd, memopv2f64>; 4610 } 4611} 4612 4613//===---------------------------------------------------------------------===// 4614// SSSE3 - Packed Absolute Instructions 4615//===---------------------------------------------------------------------===// 4616 4617/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4618multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, 4619 SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { 4620 def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4621 (ins VR128:$src), 4622 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4623 [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, 4624 Sched<[sched.XMM]>; 4625 4626 def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4627 (ins i128mem:$src), 4628 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4629 [(set VR128:$dst, 4630 (vt (OpNode (ld_frag addr:$src))))]>, 4631 Sched<[sched.XMM.Folded]>; 4632} 4633 4634/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4635multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, 4636 SDNode OpNode, X86SchedWriteWidths sched> { 4637 def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4638 (ins VR256:$src), 4639 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4640 [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, 4641 Sched<[sched.YMM]>; 4642 4643 def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4644 (ins i256mem:$src), 4645 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4646 [(set VR256:$dst, 4647 (vt (OpNode (load addr:$src))))]>, 4648 Sched<[sched.YMM.Folded]>; 4649} 4650 4651let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4652 defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, 4653 load>, VEX, VEX_WIG; 4654 defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, 4655 load>, VEX, VEX_WIG; 4656} 4657let Predicates = [HasAVX, NoVLX] in { 4658 defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, 4659 load>, VEX, VEX_WIG; 4660} 4661let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4662 defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, 4663 VEX, VEX_L, VEX_WIG; 4664 defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, 4665 VEX, VEX_L, VEX_WIG; 4666} 4667let Predicates = [HasAVX2, NoVLX] in { 4668 defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, 4669 VEX, VEX_L, VEX_WIG; 4670} 4671 4672defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, 4673 memop>; 4674defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, 4675 memop>; 4676defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, 4677 memop>; 4678 4679//===---------------------------------------------------------------------===// 4680// SSSE3 - Packed Binary Operator Instructions 4681//===---------------------------------------------------------------------===// 4682 4683/// SS3I_binop_rm - Simple SSSE3 bin op 4684multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4685 ValueType DstVT, ValueType OpVT, RegisterClass RC, 4686 PatFrag memop_frag, X86MemOperand x86memop, 4687 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4688 let isCommutable = 1 in 4689 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 4690 (ins RC:$src1, RC:$src2), 4691 !if(Is2Addr, 4692 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4693 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4694 [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))]>, 4695 Sched<[sched]>; 4696 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 4697 (ins RC:$src1, x86memop:$src2), 4698 !if(Is2Addr, 4699 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4700 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4701 [(set RC:$dst, 4702 (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>, 4703 Sched<[sched.Folded, sched.ReadAfterFold]>; 4704} 4705 4706/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 4707multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 4708 Intrinsic IntId128, X86FoldableSchedWrite sched, 4709 PatFrag ld_frag, bit Is2Addr = 1> { 4710 let isCommutable = 1 in 4711 def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4712 (ins VR128:$src1, VR128:$src2), 4713 !if(Is2Addr, 4714 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4715 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4716 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 4717 Sched<[sched]>; 4718 def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4719 (ins VR128:$src1, i128mem:$src2), 4720 !if(Is2Addr, 4721 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4722 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4723 [(set VR128:$dst, 4724 (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 4725 Sched<[sched.Folded, sched.ReadAfterFold]>; 4726} 4727 4728multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 4729 Intrinsic IntId256, 4730 X86FoldableSchedWrite sched> { 4731 let isCommutable = 1 in 4732 def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 4733 (ins VR256:$src1, VR256:$src2), 4734 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4735 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 4736 Sched<[sched]>; 4737 def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 4738 (ins VR256:$src1, i256mem:$src2), 4739 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4740 [(set VR256:$dst, 4741 (IntId256 VR256:$src1, (load addr:$src2)))]>, 4742 Sched<[sched.Folded, sched.ReadAfterFold]>; 4743} 4744 4745let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 4746let isCommutable = 0 in { 4747 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, 4748 VR128, load, i128mem, 4749 SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4750 defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, 4751 v16i8, VR128, load, i128mem, 4752 SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4753} 4754defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, 4755 VR128, load, i128mem, 4756 SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; 4757} 4758 4759let ImmT = NoImm, Predicates = [HasAVX] in { 4760let isCommutable = 0 in { 4761 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, 4762 load, i128mem, 4763 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4764 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, 4765 load, i128mem, 4766 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4767 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, 4768 load, i128mem, 4769 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4770 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, 4771 load, i128mem, 4772 SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; 4773 defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", 4774 int_x86_ssse3_psign_b_128, 4775 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4776 defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", 4777 int_x86_ssse3_psign_w_128, 4778 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4779 defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", 4780 int_x86_ssse3_psign_d_128, 4781 SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; 4782 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 4783 int_x86_ssse3_phadd_sw_128, 4784 SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4785 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 4786 int_x86_ssse3_phsub_sw_128, 4787 SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG; 4788} 4789} 4790 4791let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4792let isCommutable = 0 in { 4793 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, 4794 VR256, load, i256mem, 4795 SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4796 defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, 4797 v32i8, VR256, load, i256mem, 4798 SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4799} 4800defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, 4801 VR256, load, i256mem, 4802 SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4803} 4804 4805let ImmT = NoImm, Predicates = [HasAVX2] in { 4806let isCommutable = 0 in { 4807 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, 4808 VR256, load, i256mem, 4809 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4810 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, 4811 load, i256mem, 4812 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4813 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, 4814 VR256, load, i256mem, 4815 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4816 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, 4817 load, i256mem, 4818 SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4819 defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, 4820 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4821 defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, 4822 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4823 defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, 4824 SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; 4825 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 4826 int_x86_avx2_phadd_sw, 4827 SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4828 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 4829 int_x86_avx2_phsub_sw, 4830 SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; 4831} 4832} 4833 4834// None of these have i8 immediate fields. 4835let ImmT = NoImm, Constraints = "$src1 = $dst" in { 4836let isCommutable = 0 in { 4837 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, 4838 memop, i128mem, SchedWritePHAdd.XMM>; 4839 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, 4840 memop, i128mem, SchedWritePHAdd.XMM>; 4841 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, 4842 memop, i128mem, SchedWritePHAdd.XMM>; 4843 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, 4844 memop, i128mem, SchedWritePHAdd.XMM>; 4845 defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, 4846 SchedWriteVecALU.XMM, memop>; 4847 defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, 4848 SchedWriteVecALU.XMM, memop>; 4849 defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, 4850 SchedWriteVecALU.XMM, memop>; 4851 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, 4852 memop, i128mem, SchedWriteVarShuffle.XMM>; 4853 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 4854 int_x86_ssse3_phadd_sw_128, 4855 SchedWritePHAdd.XMM, memop>; 4856 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 4857 int_x86_ssse3_phsub_sw_128, 4858 SchedWritePHAdd.XMM, memop>; 4859 defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, 4860 v16i8, VR128, memop, i128mem, 4861 SchedWriteVecIMul.XMM>; 4862} 4863defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, 4864 VR128, memop, i128mem, SchedWriteVecIMul.XMM>; 4865} 4866 4867//===---------------------------------------------------------------------===// 4868// SSSE3 - Packed Align Instruction Patterns 4869//===---------------------------------------------------------------------===// 4870 4871multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, 4872 PatFrag memop_frag, X86MemOperand x86memop, 4873 X86FoldableSchedWrite sched, bit Is2Addr = 1> { 4874 let hasSideEffects = 0 in { 4875 def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), 4876 (ins RC:$src1, RC:$src2, u8imm:$src3), 4877 !if(Is2Addr, 4878 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4879 !strconcat(asm, 4880 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4881 [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, 4882 Sched<[sched]>; 4883 let mayLoad = 1 in 4884 def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), 4885 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 4886 !if(Is2Addr, 4887 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4888 !strconcat(asm, 4889 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4890 [(set RC:$dst, (VT (X86PAlignr RC:$src1, 4891 (memop_frag addr:$src2), 4892 (i8 timm:$src3))))]>, 4893 Sched<[sched.Folded, sched.ReadAfterFold]>; 4894 } 4895} 4896 4897let Predicates = [HasAVX, NoVLX_Or_NoBWI] in 4898 defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem, 4899 SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; 4900let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in 4901 defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem, 4902 SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; 4903let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 4904 defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem, 4905 SchedWriteShuffle.XMM>; 4906 4907//===---------------------------------------------------------------------===// 4908// SSSE3 - Thread synchronization 4909//===---------------------------------------------------------------------===// 4910 4911let SchedRW = [WriteSystem] in { 4912let Uses = [EAX, ECX, EDX] in 4913def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4914 TB, Requires<[HasSSE3, Not64BitMode]>; 4915let Uses = [RAX, ECX, EDX] in 4916def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, 4917 TB, Requires<[HasSSE3, In64BitMode]>; 4918 4919let Uses = [ECX, EAX] in 4920def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 4921 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; 4922} // SchedRW 4923 4924def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 4925def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 4926 4927def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>, 4928 Requires<[Not64BitMode]>; 4929def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>, 4930 Requires<[In64BitMode]>; 4931 4932//===----------------------------------------------------------------------===// 4933// SSE4.1 - Packed Move with Sign/Zero Extend 4934// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp 4935//===----------------------------------------------------------------------===// 4936 4937multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4938 RegisterClass OutRC, RegisterClass InRC, 4939 X86FoldableSchedWrite sched> { 4940 def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 4941 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4942 Sched<[sched]>; 4943 4944 def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 4945 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 4946 Sched<[sched.Folded]>; 4947} 4948 4949multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 4950 X86MemOperand MemOp, X86MemOperand MemYOp, 4951 Predicate prd> { 4952 defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, 4953 SchedWriteShuffle.XMM>; 4954 let Predicates = [HasAVX, prd] in 4955 defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 4956 VR128, VR128, SchedWriteShuffle.XMM>, 4957 VEX, VEX_WIG; 4958 let Predicates = [HasAVX2, prd] in 4959 defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 4960 VR256, VR128, WriteVPMOV256>, 4961 VEX, VEX_L, VEX_WIG; 4962} 4963 4964multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 4965 X86MemOperand MemYOp, Predicate prd> { 4966 defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 4967 MemOp, MemYOp, prd>; 4968 defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 4969 !strconcat("pmovzx", OpcodeStr), 4970 MemOp, MemYOp, prd>; 4971} 4972 4973defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; 4974defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem, NoVLX>; 4975defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem, NoVLX>; 4976 4977defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem, NoVLX>; 4978defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>; 4979 4980defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>; 4981 4982// AVX2 Patterns 4983multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, 4984 SDNode ExtOp, SDNode InVecOp> { 4985 // Register-Register patterns 4986 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 4987 def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 4988 (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 4989 } 4990 let Predicates = [HasAVX2, NoVLX] in { 4991 def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))), 4992 (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 4993 def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))), 4994 (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 4995 4996 def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 4997 (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 4998 def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))), 4999 (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 5000 5001 def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 5002 (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 5003 } 5004 5005 // Simple Register-Memory patterns 5006 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 5007 def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5008 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5009 5010 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 5011 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5012 } 5013 5014 let Predicates = [HasAVX2, NoVLX] in { 5015 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5016 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5017 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5018 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5019 5020 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5021 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5022 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5023 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5024 5025 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5026 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 5027 } 5028 5029 // AVX2 Register-Memory patterns 5030 let Predicates = [HasAVX2, NoVLX] in { 5031 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 5032 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5033 5034 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5035 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5036 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5037 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5038 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 5039 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5040 5041 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 5042 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 5043 5044 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5045 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5046 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))), 5047 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5048 5049 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5050 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5051 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5052 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5053 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 5054 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5055 } 5056} 5057 5058defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>; 5059defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>; 5060 5061// SSE4.1/AVX patterns. 5062multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 5063 SDNode ExtOp> { 5064 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5065 def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 5066 (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 5067 } 5068 let Predicates = [HasAVX, NoVLX] in { 5069 def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 5070 (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 5071 def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 5072 (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 5073 5074 def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 5075 (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 5076 def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 5077 (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 5078 5079 def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 5080 (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 5081 } 5082 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5083 def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5084 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5085 } 5086 let Predicates = [HasAVX, NoVLX] in { 5087 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5088 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5089 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5090 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5091 5092 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5093 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5094 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5095 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5096 5097 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5098 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5099 } 5100 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5101 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5102 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5103 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5104 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5105 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 5106 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5107 def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), 5108 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 5109 } 5110 let Predicates = [HasAVX, NoVLX] in { 5111 def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5112 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5113 def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 5114 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5115 def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), 5116 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 5117 5118 def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 5119 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5120 def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), 5121 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 5122 5123 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5124 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5125 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5126 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5127 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 5128 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5129 def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), 5130 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 5131 5132 def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5133 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5134 def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 5135 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5136 def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), 5137 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 5138 5139 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5140 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5141 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 5142 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5143 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 5144 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5145 def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), 5146 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 5147 } 5148} 5149 5150defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec>; 5151defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec>; 5152 5153let Predicates = [UseSSE41] in { 5154 defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec>; 5155 defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec>; 5156} 5157 5158//===----------------------------------------------------------------------===// 5159// SSE4.1 - Extract Instructions 5160//===----------------------------------------------------------------------===// 5161 5162/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5163multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 5164 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5165 (ins VR128:$src1, u8imm:$src2), 5166 !strconcat(OpcodeStr, 5167 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5168 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 5169 timm:$src2))]>, 5170 Sched<[WriteVecExtract]>; 5171 let hasSideEffects = 0, mayStore = 1 in 5172 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5173 (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 5174 !strconcat(OpcodeStr, 5175 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5176 [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), 5177 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5178} 5179 5180let Predicates = [HasAVX, NoBWI] in 5181 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG; 5182 5183defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 5184 5185 5186/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 5187multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 5188 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 5189 def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5190 (ins VR128:$src1, u8imm:$src2), 5191 !strconcat(OpcodeStr, 5192 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, 5193 Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>; 5194 5195 let hasSideEffects = 0, mayStore = 1 in 5196 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5197 (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 5198 !strconcat(OpcodeStr, 5199 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5200 [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), 5201 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5202} 5203 5204let Predicates = [HasAVX, NoBWI] in 5205 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG; 5206 5207defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 5208 5209 5210/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5211multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 5212 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5213 (ins VR128:$src1, u8imm:$src2), 5214 !strconcat(OpcodeStr, 5215 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5216 [(set GR32:$dst, 5217 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 5218 Sched<[WriteVecExtract]>; 5219 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5220 (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 5221 !strconcat(OpcodeStr, 5222 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5223 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 5224 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5225} 5226 5227let Predicates = [HasAVX, NoDQI] in 5228 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 5229 5230defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 5231 5232/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5233multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 5234 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 5235 (ins VR128:$src1, u8imm:$src2), 5236 !strconcat(OpcodeStr, 5237 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5238 [(set GR64:$dst, 5239 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 5240 Sched<[WriteVecExtract]>; 5241 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5242 (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 5243 !strconcat(OpcodeStr, 5244 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5245 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 5246 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5247} 5248 5249let Predicates = [HasAVX, NoDQI] in 5250 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 5251 5252defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W; 5253 5254/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 5255/// destination 5256multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 5257 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 5258 (ins VR128:$src1, u8imm:$src2), 5259 !strconcat(OpcodeStr, 5260 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5261 [(set GR32orGR64:$dst, 5262 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 5263 Sched<[WriteVecExtract]>; 5264 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5265 (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 5266 !strconcat(OpcodeStr, 5267 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5268 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 5269 addr:$dst)]>, Sched<[WriteVecExtractSt]>; 5270} 5271 5272let ExeDomain = SSEPackedSingle in { 5273 let Predicates = [UseAVX] in 5274 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG; 5275 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 5276} 5277 5278//===----------------------------------------------------------------------===// 5279// SSE4.1 - Insert Instructions 5280//===----------------------------------------------------------------------===// 5281 5282multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 5283 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5284 (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 5285 !if(Is2Addr, 5286 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5287 !strconcat(asm, 5288 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5289 [(set VR128:$dst, 5290 (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, 5291 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5292 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5293 (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 5294 !if(Is2Addr, 5295 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5296 !strconcat(asm, 5297 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5298 [(set VR128:$dst, 5299 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, 5300 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5301} 5302 5303let Predicates = [HasAVX, NoBWI] in 5304 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; 5305let Constraints = "$src1 = $dst" in 5306 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 5307 5308multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 5309 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5310 (ins VR128:$src1, GR32:$src2, u8imm:$src3), 5311 !if(Is2Addr, 5312 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5313 !strconcat(asm, 5314 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5315 [(set VR128:$dst, 5316 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 5317 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5318 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5319 (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 5320 !if(Is2Addr, 5321 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5322 !strconcat(asm, 5323 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5324 [(set VR128:$dst, 5325 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, 5326 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5327} 5328 5329let Predicates = [HasAVX, NoDQI] in 5330 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 5331let Constraints = "$src1 = $dst" in 5332 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 5333 5334multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 5335 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5336 (ins VR128:$src1, GR64:$src2, u8imm:$src3), 5337 !if(Is2Addr, 5338 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5339 !strconcat(asm, 5340 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5341 [(set VR128:$dst, 5342 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 5343 Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; 5344 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5345 (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 5346 !if(Is2Addr, 5347 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5348 !strconcat(asm, 5349 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5350 [(set VR128:$dst, 5351 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, 5352 Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 5353} 5354 5355let Predicates = [HasAVX, NoDQI] in 5356 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 5357let Constraints = "$src1 = $dst" in 5358 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 5359 5360// insertps has a few different modes, there's the first two here below which 5361// are optimized inserts that won't zero arbitrary elements in the destination 5362// vector. The next one matches the intrinsic and could zero arbitrary elements 5363// in the target vector. 5364multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 5365 let isCommutable = 1 in 5366 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5367 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 5368 !if(Is2Addr, 5369 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5370 !strconcat(asm, 5371 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5372 [(set VR128:$dst, 5373 (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, 5374 Sched<[SchedWriteFShuffle.XMM]>; 5375 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5376 (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 5377 !if(Is2Addr, 5378 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5379 !strconcat(asm, 5380 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5381 [(set VR128:$dst, 5382 (X86insertps VR128:$src1, 5383 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 5384 timm:$src3))]>, 5385 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 5386} 5387 5388let ExeDomain = SSEPackedSingle in { 5389 let Predicates = [UseAVX] in 5390 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, 5391 VEX_4V, VEX_WIG; 5392 let Constraints = "$src1 = $dst" in 5393 defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; 5394} 5395 5396//===----------------------------------------------------------------------===// 5397// SSE4.1 - Round Instructions 5398//===----------------------------------------------------------------------===// 5399 5400multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, 5401 X86MemOperand x86memop, RegisterClass RC, 5402 ValueType VT, PatFrag mem_frag, SDPatternOperator OpNode, 5403 X86FoldableSchedWrite sched> { 5404 // Intrinsic operation, reg. 5405 // Vector intrinsic operation, reg 5406let Uses = [MXCSR], mayRaiseFPException = 1 in { 5407 def r : SS4AIi8<opc, MRMSrcReg, 5408 (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 5409 !strconcat(OpcodeStr, 5410 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5411 [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, 5412 Sched<[sched]>; 5413 5414 // Vector intrinsic operation, mem 5415 def m : SS4AIi8<opc, MRMSrcMem, 5416 (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 5417 !strconcat(OpcodeStr, 5418 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5419 [(set RC:$dst, 5420 (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, 5421 Sched<[sched.Folded]>; 5422} 5423} 5424 5425multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd, 5426 string OpcodeStr, X86FoldableSchedWrite sched> { 5427let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5428 def SSr : SS4AIi8<opcss, MRMSrcReg, 5429 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 5430 !strconcat(OpcodeStr, 5431 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5432 []>, Sched<[sched]>; 5433 5434 let mayLoad = 1 in 5435 def SSm : SS4AIi8<opcss, MRMSrcMem, 5436 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), 5437 !strconcat(OpcodeStr, 5438 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5439 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5440} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5441 5442let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5443 def SDr : SS4AIi8<opcsd, MRMSrcReg, 5444 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 5445 !strconcat(OpcodeStr, 5446 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5447 []>, Sched<[sched]>; 5448 5449 let mayLoad = 1 in 5450 def SDm : SS4AIi8<opcsd, MRMSrcMem, 5451 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), 5452 !strconcat(OpcodeStr, 5453 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5454 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5455} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5456} 5457 5458multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd, 5459 string OpcodeStr, X86FoldableSchedWrite sched> { 5460let Uses = [MXCSR], mayRaiseFPException = 1 in { 5461let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { 5462 def SSr : SS4AIi8<opcss, MRMSrcReg, 5463 (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2), 5464 !strconcat(OpcodeStr, 5465 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5466 []>, Sched<[sched]>; 5467 5468 let mayLoad = 1 in 5469 def SSm : SS4AIi8<opcss, MRMSrcMem, 5470 (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), 5471 !strconcat(OpcodeStr, 5472 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5473 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5474} // ExeDomain = SSEPackedSingle, hasSideEffects = 0 5475 5476let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { 5477 def SDr : SS4AIi8<opcsd, MRMSrcReg, 5478 (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2), 5479 !strconcat(OpcodeStr, 5480 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5481 []>, Sched<[sched]>; 5482 5483 let mayLoad = 1 in 5484 def SDm : SS4AIi8<opcsd, MRMSrcMem, 5485 (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), 5486 !strconcat(OpcodeStr, 5487 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5488 []>, Sched<[sched.Folded, sched.ReadAfterFold]>; 5489} // ExeDomain = SSEPackedDouble, hasSideEffects = 0 5490} 5491} 5492 5493multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd, 5494 string OpcodeStr, X86FoldableSchedWrite sched, 5495 ValueType VT32, ValueType VT64, 5496 SDNode OpNode, bit Is2Addr = 1> { 5497let Uses = [MXCSR], mayRaiseFPException = 1 in { 5498let ExeDomain = SSEPackedSingle in { 5499 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 5500 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5501 !if(Is2Addr, 5502 !strconcat(OpcodeStr, 5503 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5504 !strconcat(OpcodeStr, 5505 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5506 [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 5507 Sched<[sched]>; 5508 5509 def SSm_Int : SS4AIi8<opcss, MRMSrcMem, 5510 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 5511 !if(Is2Addr, 5512 !strconcat(OpcodeStr, 5513 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5514 !strconcat(OpcodeStr, 5515 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5516 [(set VR128:$dst, 5517 (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>, 5518 Sched<[sched.Folded, sched.ReadAfterFold]>; 5519} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 5520 5521let ExeDomain = SSEPackedDouble in { 5522 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 5523 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 5524 !if(Is2Addr, 5525 !strconcat(OpcodeStr, 5526 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5527 !strconcat(OpcodeStr, 5528 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5529 [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, 5530 Sched<[sched]>; 5531 5532 def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, 5533 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 5534 !if(Is2Addr, 5535 !strconcat(OpcodeStr, 5536 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5537 !strconcat(OpcodeStr, 5538 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5539 [(set VR128:$dst, 5540 (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>, 5541 Sched<[sched.Folded, sched.ReadAfterFold]>; 5542} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 5543} 5544} 5545 5546// FP round - roundss, roundps, roundsd, roundpd 5547let Predicates = [HasAVX, NoVLX] in { 5548 let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { 5549 // Intrinsic form 5550 defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, 5551 loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>, 5552 VEX, VEX_WIG; 5553 defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, 5554 loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>, 5555 VEX, VEX_L, VEX_WIG; 5556 } 5557 5558 let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { 5559 defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, 5560 loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>, 5561 VEX, VEX_WIG; 5562 defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, 5563 loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>, 5564 VEX, VEX_L, VEX_WIG; 5565 } 5566} 5567let Predicates = [UseAVX] in { 5568 defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, 5569 v4f32, v2f64, X86RndScales, 0>, 5570 VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 5571 defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, 5572 VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC; 5573} 5574 5575let Predicates = [UseAVX] in { 5576 def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5577 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; 5578 def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5579 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; 5580} 5581 5582let Predicates = [UseAVX, OptForSize] in { 5583 def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5584 (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5585 def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5586 (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; 5587} 5588 5589let ExeDomain = SSEPackedSingle in 5590defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, 5591 memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>; 5592let ExeDomain = SSEPackedDouble in 5593defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, 5594 memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>; 5595 5596defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; 5597 5598let Constraints = "$src1 = $dst" in 5599defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, 5600 v4f32, v2f64, X86RndScales>; 5601 5602let Predicates = [UseSSE41] in { 5603 def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), 5604 (ROUNDSSr FR32:$src1, timm:$src2)>; 5605 def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), 5606 (ROUNDSDr FR64:$src1, timm:$src2)>; 5607} 5608 5609let Predicates = [UseSSE41, OptForSize] in { 5610 def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), 5611 (ROUNDSSm addr:$src1, timm:$src2)>; 5612 def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), 5613 (ROUNDSDm addr:$src1, timm:$src2)>; 5614} 5615 5616//===----------------------------------------------------------------------===// 5617// SSE4.1 - Packed Bit Test 5618//===----------------------------------------------------------------------===// 5619 5620// ptest instruction we'll lower to this in X86ISelLowering primarily from 5621// the intel intrinsic that corresponds to this. 5622let Defs = [EFLAGS], Predicates = [HasAVX] in { 5623def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5624 "vptest\t{$src2, $src1|$src1, $src2}", 5625 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5626 Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG; 5627def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5628 "vptest\t{$src2, $src1|$src1, $src2}", 5629 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 5630 Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, 5631 VEX, VEX_WIG; 5632 5633def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 5634 "vptest\t{$src2, $src1|$src1, $src2}", 5635 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 5636 Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG; 5637def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 5638 "vptest\t{$src2, $src1|$src1, $src2}", 5639 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 5640 Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, 5641 VEX, VEX_L, VEX_WIG; 5642} 5643 5644let Defs = [EFLAGS] in { 5645def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5646 "ptest\t{$src2, $src1|$src1, $src2}", 5647 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 5648 Sched<[SchedWriteVecTest.XMM]>; 5649def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5650 "ptest\t{$src2, $src1|$src1, $src2}", 5651 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 5652 Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; 5653} 5654 5655// The bit test instructions below are AVX only 5656multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 5657 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, 5658 X86FoldableSchedWrite sched> { 5659 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 5660 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5661 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 5662 Sched<[sched]>, VEX; 5663 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 5664 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5665 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 5666 Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; 5667} 5668 5669let Defs = [EFLAGS], Predicates = [HasAVX] in { 5670let ExeDomain = SSEPackedSingle in { 5671defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, 5672 SchedWriteFTest.XMM>; 5673defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, 5674 SchedWriteFTest.YMM>, VEX_L; 5675} 5676let ExeDomain = SSEPackedDouble in { 5677defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, 5678 SchedWriteFTest.XMM>; 5679defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, 5680 SchedWriteFTest.YMM>, VEX_L; 5681} 5682} 5683 5684//===----------------------------------------------------------------------===// 5685// SSE4.1 - Misc Instructions 5686//===----------------------------------------------------------------------===// 5687 5688let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 5689 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 5690 "popcnt{w}\t{$src, $dst|$dst, $src}", 5691 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 5692 Sched<[WritePOPCNT]>, OpSize16, XS; 5693 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 5694 "popcnt{w}\t{$src, $dst|$dst, $src}", 5695 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 5696 (implicit EFLAGS)]>, 5697 Sched<[WritePOPCNT.Folded]>, OpSize16, XS; 5698 5699 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 5700 "popcnt{l}\t{$src, $dst|$dst, $src}", 5701 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 5702 Sched<[WritePOPCNT]>, OpSize32, XS; 5703 5704 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 5705 "popcnt{l}\t{$src, $dst|$dst, $src}", 5706 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 5707 (implicit EFLAGS)]>, 5708 Sched<[WritePOPCNT.Folded]>, OpSize32, XS; 5709 5710 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 5711 "popcnt{q}\t{$src, $dst|$dst, $src}", 5712 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 5713 Sched<[WritePOPCNT]>, XS; 5714 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 5715 "popcnt{q}\t{$src, $dst|$dst, $src}", 5716 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 5717 (implicit EFLAGS)]>, 5718 Sched<[WritePOPCNT.Folded]>, XS; 5719} 5720 5721// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 5722multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 5723 SDNode OpNode, PatFrag ld_frag, 5724 X86FoldableSchedWrite Sched> { 5725 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5726 (ins VR128:$src), 5727 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5728 [(set VR128:$dst, (v8i16 (OpNode (v8i16 VR128:$src))))]>, 5729 Sched<[Sched]>; 5730 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5731 (ins i128mem:$src), 5732 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5733 [(set VR128:$dst, 5734 (v8i16 (OpNode (ld_frag addr:$src))))]>, 5735 Sched<[Sched.Folded]>; 5736} 5737 5738// PHMIN has the same profile as PSAD, thus we use the same scheduling 5739// model, although the naming is misleading. 5740let Predicates = [HasAVX] in 5741defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw", 5742 X86phminpos, load, 5743 WritePHMINPOS>, VEX, VEX_WIG; 5744defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw", 5745 X86phminpos, memop, 5746 WritePHMINPOS>; 5747 5748/// SS48I_binop_rm - Simple SSE41 binary operator. 5749multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5750 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5751 X86MemOperand x86memop, X86FoldableSchedWrite sched, 5752 bit Is2Addr = 1> { 5753 let isCommutable = 1 in 5754 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 5755 (ins RC:$src1, RC:$src2), 5756 !if(Is2Addr, 5757 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5758 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5759 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 5760 Sched<[sched]>; 5761 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 5762 (ins RC:$src1, x86memop:$src2), 5763 !if(Is2Addr, 5764 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5765 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5766 [(set RC:$dst, 5767 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 5768 Sched<[sched.Folded, sched.ReadAfterFold]>; 5769} 5770 5771let Predicates = [HasAVX, NoVLX] in { 5772 defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, 5773 load, i128mem, SchedWriteVecALU.XMM, 0>, 5774 VEX_4V, VEX_WIG; 5775 defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, 5776 load, i128mem, SchedWriteVecALU.XMM, 0>, 5777 VEX_4V, VEX_WIG; 5778 defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, 5779 load, i128mem, SchedWriteVecALU.XMM, 0>, 5780 VEX_4V, VEX_WIG; 5781 defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, 5782 load, i128mem, SchedWriteVecALU.XMM, 0>, 5783 VEX_4V, VEX_WIG; 5784 defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, 5785 load, i128mem, SchedWriteVecIMul.XMM, 0>, 5786 VEX_4V, VEX_WIG; 5787} 5788let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { 5789 defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, 5790 load, i128mem, SchedWriteVecALU.XMM, 0>, 5791 VEX_4V, VEX_WIG; 5792 defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, 5793 load, i128mem, SchedWriteVecALU.XMM, 0>, 5794 VEX_4V, VEX_WIG; 5795 defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, 5796 load, i128mem, SchedWriteVecALU.XMM, 0>, 5797 VEX_4V, VEX_WIG; 5798 defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, 5799 load, i128mem, SchedWriteVecALU.XMM, 0>, 5800 VEX_4V, VEX_WIG; 5801} 5802 5803let Predicates = [HasAVX2, NoVLX] in { 5804 defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, 5805 load, i256mem, SchedWriteVecALU.YMM, 0>, 5806 VEX_4V, VEX_L, VEX_WIG; 5807 defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, 5808 load, i256mem, SchedWriteVecALU.YMM, 0>, 5809 VEX_4V, VEX_L, VEX_WIG; 5810 defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, 5811 load, i256mem, SchedWriteVecALU.YMM, 0>, 5812 VEX_4V, VEX_L, VEX_WIG; 5813 defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, 5814 load, i256mem, SchedWriteVecALU.YMM, 0>, 5815 VEX_4V, VEX_L, VEX_WIG; 5816 defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, 5817 load, i256mem, SchedWriteVecIMul.YMM, 0>, 5818 VEX_4V, VEX_L, VEX_WIG; 5819} 5820let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 5821 defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, 5822 load, i256mem, SchedWriteVecALU.YMM, 0>, 5823 VEX_4V, VEX_L, VEX_WIG; 5824 defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, 5825 load, i256mem, SchedWriteVecALU.YMM, 0>, 5826 VEX_4V, VEX_L, VEX_WIG; 5827 defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, 5828 load, i256mem, SchedWriteVecALU.YMM, 0>, 5829 VEX_4V, VEX_L, VEX_WIG; 5830 defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, 5831 load, i256mem, SchedWriteVecALU.YMM, 0>, 5832 VEX_4V, VEX_L, VEX_WIG; 5833} 5834 5835let Constraints = "$src1 = $dst" in { 5836 defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, 5837 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5838 defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, 5839 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5840 defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, 5841 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5842 defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, 5843 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5844 defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, 5845 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5846 defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, 5847 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5848 defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, 5849 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5850 defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, 5851 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5852 defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, 5853 memop, i128mem, SchedWriteVecIMul.XMM, 1>; 5854} 5855 5856let Predicates = [HasAVX, NoVLX] in 5857 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 5858 load, i128mem, SchedWritePMULLD.XMM, 0>, 5859 VEX_4V, VEX_WIG; 5860let Predicates = [HasAVX] in 5861 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 5862 load, i128mem, SchedWriteVecALU.XMM, 0>, 5863 VEX_4V, VEX_WIG; 5864 5865let Predicates = [HasAVX2, NoVLX] in 5866 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 5867 load, i256mem, SchedWritePMULLD.YMM, 0>, 5868 VEX_4V, VEX_L, VEX_WIG; 5869let Predicates = [HasAVX2] in 5870 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 5871 load, i256mem, SchedWriteVecALU.YMM, 0>, 5872 VEX_4V, VEX_L, VEX_WIG; 5873 5874let Constraints = "$src1 = $dst" in { 5875 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 5876 memop, i128mem, SchedWritePMULLD.XMM, 1>; 5877 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 5878 memop, i128mem, SchedWriteVecALU.XMM, 1>; 5879} 5880 5881/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 5882multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 5883 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 5884 X86MemOperand x86memop, bit Is2Addr, 5885 X86FoldableSchedWrite sched> { 5886 let isCommutable = 1 in 5887 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5888 (ins RC:$src1, RC:$src2, u8imm:$src3), 5889 !if(Is2Addr, 5890 !strconcat(OpcodeStr, 5891 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5892 !strconcat(OpcodeStr, 5893 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5894 [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, 5895 Sched<[sched]>; 5896 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5897 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5898 !if(Is2Addr, 5899 !strconcat(OpcodeStr, 5900 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5901 !strconcat(OpcodeStr, 5902 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5903 [(set RC:$dst, 5904 (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, 5905 Sched<[sched.Folded, sched.ReadAfterFold]>; 5906} 5907 5908/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 5909multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 5910 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5911 X86MemOperand x86memop, bit Is2Addr, 5912 X86FoldableSchedWrite sched> { 5913 let isCommutable = 1 in 5914 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5915 (ins RC:$src1, RC:$src2, u8imm:$src3), 5916 !if(Is2Addr, 5917 !strconcat(OpcodeStr, 5918 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5919 !strconcat(OpcodeStr, 5920 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5921 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 5922 Sched<[sched]>; 5923 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5924 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 5925 !if(Is2Addr, 5926 !strconcat(OpcodeStr, 5927 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5928 !strconcat(OpcodeStr, 5929 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5930 [(set RC:$dst, 5931 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 5932 Sched<[sched.Folded, sched.ReadAfterFold]>; 5933} 5934 5935def BlendCommuteImm2 : SDNodeXForm<timm, [{ 5936 uint8_t Imm = N->getZExtValue() & 0x03; 5937 return getI8Imm(Imm ^ 0x03, SDLoc(N)); 5938}]>; 5939 5940def BlendCommuteImm4 : SDNodeXForm<timm, [{ 5941 uint8_t Imm = N->getZExtValue() & 0x0f; 5942 return getI8Imm(Imm ^ 0x0f, SDLoc(N)); 5943}]>; 5944 5945def BlendCommuteImm8 : SDNodeXForm<timm, [{ 5946 uint8_t Imm = N->getZExtValue() & 0xff; 5947 return getI8Imm(Imm ^ 0xff, SDLoc(N)); 5948}]>; 5949 5950// Turn a 4-bit blendi immediate to 8-bit for use with pblendw. 5951def BlendScaleImm4 : SDNodeXForm<timm, [{ 5952 uint8_t Imm = N->getZExtValue(); 5953 uint8_t NewImm = 0; 5954 for (unsigned i = 0; i != 4; ++i) { 5955 if (Imm & (1 << i)) 5956 NewImm |= 0x3 << (i * 2); 5957 } 5958 return getI8Imm(NewImm, SDLoc(N)); 5959}]>; 5960 5961// Turn a 2-bit blendi immediate to 8-bit for use with pblendw. 5962def BlendScaleImm2 : SDNodeXForm<timm, [{ 5963 uint8_t Imm = N->getZExtValue(); 5964 uint8_t NewImm = 0; 5965 for (unsigned i = 0; i != 2; ++i) { 5966 if (Imm & (1 << i)) 5967 NewImm |= 0xf << (i * 4); 5968 } 5969 return getI8Imm(NewImm, SDLoc(N)); 5970}]>; 5971 5972// Turn a 2-bit blendi immediate to 4-bit for use with pblendd. 5973def BlendScaleImm2to4 : SDNodeXForm<timm, [{ 5974 uint8_t Imm = N->getZExtValue(); 5975 uint8_t NewImm = 0; 5976 for (unsigned i = 0; i != 2; ++i) { 5977 if (Imm & (1 << i)) 5978 NewImm |= 0x3 << (i * 2); 5979 } 5980 return getI8Imm(NewImm, SDLoc(N)); 5981}]>; 5982 5983// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. 5984def BlendScaleCommuteImm4 : SDNodeXForm<timm, [{ 5985 uint8_t Imm = N->getZExtValue(); 5986 uint8_t NewImm = 0; 5987 for (unsigned i = 0; i != 4; ++i) { 5988 if (Imm & (1 << i)) 5989 NewImm |= 0x3 << (i * 2); 5990 } 5991 return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 5992}]>; 5993 5994// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. 5995def BlendScaleCommuteImm2 : SDNodeXForm<timm, [{ 5996 uint8_t Imm = N->getZExtValue(); 5997 uint8_t NewImm = 0; 5998 for (unsigned i = 0; i != 2; ++i) { 5999 if (Imm & (1 << i)) 6000 NewImm |= 0xf << (i * 4); 6001 } 6002 return getI8Imm(NewImm ^ 0xff, SDLoc(N)); 6003}]>; 6004 6005// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. 6006def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{ 6007 uint8_t Imm = N->getZExtValue(); 6008 uint8_t NewImm = 0; 6009 for (unsigned i = 0; i != 2; ++i) { 6010 if (Imm & (1 << i)) 6011 NewImm |= 0x3 << (i * 2); 6012 } 6013 return getI8Imm(NewImm ^ 0xf, SDLoc(N)); 6014}]>; 6015 6016let Predicates = [HasAVX] in { 6017 let isCommutable = 0 in { 6018 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 6019 VR128, load, i128mem, 0, 6020 SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; 6021 } 6022 6023let Uses = [MXCSR], mayRaiseFPException = 1 in { 6024 let ExeDomain = SSEPackedSingle in 6025 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 6026 VR128, load, f128mem, 0, 6027 SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; 6028 let ExeDomain = SSEPackedDouble in 6029 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 6030 VR128, load, f128mem, 0, 6031 SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; 6032 let ExeDomain = SSEPackedSingle in 6033 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 6034 VR256, load, i256mem, 0, 6035 SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; 6036} 6037} 6038 6039let Predicates = [HasAVX2] in { 6040 let isCommutable = 0 in { 6041 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 6042 VR256, load, i256mem, 0, 6043 SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; 6044 } 6045} 6046 6047let Constraints = "$src1 = $dst" in { 6048 let isCommutable = 0 in { 6049 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 6050 VR128, memop, i128mem, 1, 6051 SchedWriteMPSAD.XMM>; 6052 } 6053 6054 let ExeDomain = SSEPackedSingle in 6055 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 6056 VR128, memop, f128mem, 1, 6057 SchedWriteDPPS.XMM>, SIMD_EXC; 6058 let ExeDomain = SSEPackedDouble in 6059 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 6060 VR128, memop, f128mem, 1, 6061 SchedWriteDPPD.XMM>, SIMD_EXC; 6062} 6063 6064/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate 6065multiclass SS41I_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 6066 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6067 X86MemOperand x86memop, bit Is2Addr, Domain d, 6068 X86FoldableSchedWrite sched, SDNodeXForm commuteXForm> { 6069let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { 6070 let isCommutable = 1 in 6071 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6072 (ins RC:$src1, RC:$src2, u8imm:$src3), 6073 !if(Is2Addr, 6074 !strconcat(OpcodeStr, 6075 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6076 !strconcat(OpcodeStr, 6077 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6078 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 6079 Sched<[sched]>; 6080 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6081 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 6082 !if(Is2Addr, 6083 !strconcat(OpcodeStr, 6084 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6085 !strconcat(OpcodeStr, 6086 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6087 [(set RC:$dst, 6088 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, 6089 Sched<[sched.Folded, sched.ReadAfterFold]>; 6090} 6091 6092 // Pattern to commute if load is in first source. 6093 def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), 6094 (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 6095 (commuteXForm timm:$src3))>; 6096} 6097 6098let Predicates = [HasAVX] in { 6099 defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32, 6100 VR128, load, f128mem, 0, SSEPackedSingle, 6101 SchedWriteFBlend.XMM, BlendCommuteImm4>, 6102 VEX_4V, VEX_WIG; 6103 defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, 6104 VR256, load, f256mem, 0, SSEPackedSingle, 6105 SchedWriteFBlend.YMM, BlendCommuteImm8>, 6106 VEX_4V, VEX_L, VEX_WIG; 6107 defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 6108 VR128, load, f128mem, 0, SSEPackedDouble, 6109 SchedWriteFBlend.XMM, BlendCommuteImm2>, 6110 VEX_4V, VEX_WIG; 6111 defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 6112 VR256, load, f256mem, 0, SSEPackedDouble, 6113 SchedWriteFBlend.YMM, BlendCommuteImm4>, 6114 VEX_4V, VEX_L, VEX_WIG; 6115 defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 6116 VR128, load, i128mem, 0, SSEPackedInt, 6117 SchedWriteBlend.XMM, BlendCommuteImm8>, 6118 VEX_4V, VEX_WIG; 6119} 6120 6121let Predicates = [HasAVX2] in { 6122 defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 6123 VR256, load, i256mem, 0, SSEPackedInt, 6124 SchedWriteBlend.YMM, BlendCommuteImm8>, 6125 VEX_4V, VEX_L, VEX_WIG; 6126} 6127 6128// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. 6129// ExecutionDomainFixPass will cleanup domains later on. 6130let Predicates = [HasAVX1Only] in { 6131def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 6132 (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6133def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 6134 (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6135def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 6136 (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; 6137 6138// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6139// it from becoming movsd via commuting under optsize. 6140def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6141 (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6142def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 6143 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6144def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 6145 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 6146 6147def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), 6148 (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; 6149def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), 6150 (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; 6151def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), 6152 (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; 6153 6154// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6155// it from becoming movss via commuting under optsize. 6156def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6157 (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6158def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), 6159 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6160def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), 6161 (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 6162} 6163 6164defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, 6165 VR128, memop, f128mem, 1, SSEPackedSingle, 6166 SchedWriteFBlend.XMM, BlendCommuteImm4>; 6167defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64, 6168 VR128, memop, f128mem, 1, SSEPackedDouble, 6169 SchedWriteFBlend.XMM, BlendCommuteImm2>; 6170defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, 6171 VR128, memop, i128mem, 1, SSEPackedInt, 6172 SchedWriteBlend.XMM, BlendCommuteImm8>; 6173 6174let Predicates = [UseSSE41] in { 6175// Use pblendw for 128-bit integer to keep it in the integer domain and prevent 6176// it from becoming movss via commuting under optsize. 6177def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 6178 (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; 6179def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), 6180 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; 6181def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), 6182 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; 6183 6184def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), 6185 (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; 6186def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), 6187 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 6188def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), 6189 (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 6190} 6191 6192// For insertion into the zero index (low half) of a 256-bit vector, it is 6193// more efficient to generate a blend with immediate instead of an insert*128. 6194let Predicates = [HasAVX] in { 6195def : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)), 6196 (VBLENDPDYrri VR256:$src1, 6197 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6198 VR128:$src2, sub_xmm), 0x3)>; 6199def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)), 6200 (VBLENDPSYrri VR256:$src1, 6201 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6202 VR128:$src2, sub_xmm), 0xf)>; 6203 6204def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)), 6205 (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 6206 VR128:$src1, sub_xmm), addr:$src2, 0xc)>; 6207def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)), 6208 (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 6209 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 6210} 6211 6212/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators 6213multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, 6214 X86MemOperand x86memop, ValueType VT, 6215 PatFrag mem_frag, SDNode OpNode, 6216 X86FoldableSchedWrite sched> { 6217 def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst), 6218 (ins RC:$src1, RC:$src2, RC:$src3), 6219 !strconcat(OpcodeStr, 6220 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6221 [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))], 6222 SSEPackedInt>, TAPD, VEX_4V, 6223 Sched<[sched]>; 6224 6225 def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst), 6226 (ins RC:$src1, x86memop:$src2, RC:$src3), 6227 !strconcat(OpcodeStr, 6228 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6229 [(set RC:$dst, 6230 (OpNode RC:$src3, (mem_frag addr:$src2), 6231 RC:$src1))], SSEPackedInt>, TAPD, VEX_4V, 6232 Sched<[sched.Folded, sched.ReadAfterFold, 6233 // x86memop:$src2 6234 ReadDefault, ReadDefault, ReadDefault, ReadDefault, 6235 ReadDefault, 6236 // RC::$src3 6237 sched.ReadAfterFold]>; 6238} 6239 6240let Predicates = [HasAVX] in { 6241let ExeDomain = SSEPackedDouble in { 6242defm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem, 6243 v2f64, loadv2f64, X86Blendv, 6244 SchedWriteFVarBlend.XMM>; 6245defm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem, 6246 v4f64, loadv4f64, X86Blendv, 6247 SchedWriteFVarBlend.YMM>, VEX_L; 6248} // ExeDomain = SSEPackedDouble 6249let ExeDomain = SSEPackedSingle in { 6250defm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem, 6251 v4f32, loadv4f32, X86Blendv, 6252 SchedWriteFVarBlend.XMM>; 6253defm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem, 6254 v8f32, loadv8f32, X86Blendv, 6255 SchedWriteFVarBlend.YMM>, VEX_L; 6256} // ExeDomain = SSEPackedSingle 6257defm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem, 6258 v16i8, loadv16i8, X86Blendv, 6259 SchedWriteVarBlend.XMM>; 6260} 6261 6262let Predicates = [HasAVX2] in { 6263defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem, 6264 v32i8, loadv32i8, X86Blendv, 6265 SchedWriteVarBlend.YMM>, VEX_L; 6266} 6267 6268let Predicates = [HasAVX] in { 6269 def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1), 6270 (v4i32 VR128:$src2))), 6271 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6272 def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1), 6273 (v2i64 VR128:$src2))), 6274 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6275 def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1), 6276 (v8i32 VR256:$src2))), 6277 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6278 def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1), 6279 (v4i64 VR256:$src2))), 6280 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6281} 6282 6283// Prefer a movss or movsd over a blendps when optimizing for size. these were 6284// changed to use blends because blends have better throughput on sandybridge 6285// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6286let Predicates = [HasAVX, OptForSpeed] in { 6287 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6288 (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6289 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6290 (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6291 6292 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6293 (VBLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6294 def : Pat<(v4f32 (X86Movss VR128:$src1, (loadv4f32 addr:$src2))), 6295 (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6296 def : Pat<(v4f32 (X86Movss (loadv4f32 addr:$src2), VR128:$src1)), 6297 (VBLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6298 6299 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6300 (VBLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6301 def : Pat<(v2f64 (X86Movsd VR128:$src1, (loadv2f64 addr:$src2))), 6302 (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6303 def : Pat<(v2f64 (X86Movsd (loadv2f64 addr:$src2), VR128:$src1)), 6304 (VBLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6305 6306 // Move low f32 and clear high bits. 6307 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 6308 (SUBREG_TO_REG (i32 0), 6309 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 6310 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), 6311 (i8 1))), sub_xmm)>; 6312 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 6313 (SUBREG_TO_REG (i32 0), 6314 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 6315 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), 6316 (i8 3))), sub_xmm)>; 6317} 6318 6319// Prefer a movss or movsd over a blendps when optimizing for size. these were 6320// changed to use blends because blends have better throughput on sandybridge 6321// and haswell, but movs[s/d] are 1-2 byte shorter instructions. 6322let Predicates = [UseSSE41, OptForSpeed] in { 6323 // With SSE41 we can use blends for these patterns. 6324 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 6325 (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 6326 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 6327 (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 6328 6329 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 6330 (BLENDPSrri VR128:$src1, VR128:$src2, (i8 1))>; 6331 def : Pat<(v4f32 (X86Movss VR128:$src1, (memopv4f32 addr:$src2))), 6332 (BLENDPSrmi VR128:$src1, addr:$src2, (i8 1))>; 6333 def : Pat<(v4f32 (X86Movss (memopv4f32 addr:$src2), VR128:$src1)), 6334 (BLENDPSrmi VR128:$src1, addr:$src2, (i8 0xe))>; 6335 6336 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 6337 (BLENDPDrri VR128:$src1, VR128:$src2, (i8 1))>; 6338 def : Pat<(v2f64 (X86Movsd VR128:$src1, (memopv2f64 addr:$src2))), 6339 (BLENDPDrmi VR128:$src1, addr:$src2, (i8 1))>; 6340 def : Pat<(v2f64 (X86Movsd (memopv2f64 addr:$src2), VR128:$src1)), 6341 (BLENDPDrmi VR128:$src1, addr:$src2, (i8 2))>; 6342} 6343 6344 6345/// SS41I_ternary - SSE 4.1 ternary operator 6346let Uses = [XMM0], Constraints = "$src1 = $dst" in { 6347 multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT, 6348 PatFrag mem_frag, X86MemOperand x86memop, 6349 SDNode OpNode, X86FoldableSchedWrite sched> { 6350 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6351 (ins VR128:$src1, VR128:$src2), 6352 !strconcat(OpcodeStr, 6353 "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6354 [(set VR128:$dst, 6355 (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>, 6356 Sched<[sched]>; 6357 6358 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6359 (ins VR128:$src1, x86memop:$src2), 6360 !strconcat(OpcodeStr, 6361 "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6362 [(set VR128:$dst, 6363 (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>, 6364 Sched<[sched.Folded, sched.ReadAfterFold]>; 6365 } 6366} 6367 6368let ExeDomain = SSEPackedDouble in 6369defm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem, 6370 X86Blendv, SchedWriteFVarBlend.XMM>; 6371let ExeDomain = SSEPackedSingle in 6372defm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem, 6373 X86Blendv, SchedWriteFVarBlend.XMM>; 6374defm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem, 6375 X86Blendv, SchedWriteVarBlend.XMM>; 6376 6377// Aliases with the implicit xmm0 argument 6378def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6379 (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>; 6380def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}", 6381 (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>; 6382def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6383 (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>; 6384def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}", 6385 (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>; 6386def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6387 (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>; 6388def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}", 6389 (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>; 6390 6391let Predicates = [UseSSE41] in { 6392 def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1), 6393 (v4i32 VR128:$src2))), 6394 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6395 def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1), 6396 (v2i64 VR128:$src2))), 6397 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6398} 6399 6400let AddedComplexity = 400 in { // Prefer non-temporal versions 6401 6402let Predicates = [HasAVX, NoVLX] in 6403def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6404 "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6405 Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; 6406let Predicates = [HasAVX2, NoVLX] in 6407def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 6408 "vmovntdqa\t{$src, $dst|$dst, $src}", []>, 6409 Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; 6410def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6411 "movntdqa\t{$src, $dst|$dst, $src}", []>, 6412 Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; 6413 6414let Predicates = [HasAVX2, NoVLX] in { 6415 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 6416 (VMOVNTDQAYrm addr:$src)>; 6417 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 6418 (VMOVNTDQAYrm addr:$src)>; 6419 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 6420 (VMOVNTDQAYrm addr:$src)>; 6421 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 6422 (VMOVNTDQAYrm addr:$src)>; 6423 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 6424 (VMOVNTDQAYrm addr:$src)>; 6425 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 6426 (VMOVNTDQAYrm addr:$src)>; 6427} 6428 6429let Predicates = [HasAVX, NoVLX] in { 6430 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6431 (VMOVNTDQArm addr:$src)>; 6432 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6433 (VMOVNTDQArm addr:$src)>; 6434 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6435 (VMOVNTDQArm addr:$src)>; 6436 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6437 (VMOVNTDQArm addr:$src)>; 6438 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6439 (VMOVNTDQArm addr:$src)>; 6440 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6441 (VMOVNTDQArm addr:$src)>; 6442} 6443 6444let Predicates = [UseSSE41] in { 6445 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 6446 (MOVNTDQArm addr:$src)>; 6447 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 6448 (MOVNTDQArm addr:$src)>; 6449 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 6450 (MOVNTDQArm addr:$src)>; 6451 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 6452 (MOVNTDQArm addr:$src)>; 6453 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 6454 (MOVNTDQArm addr:$src)>; 6455 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 6456 (MOVNTDQArm addr:$src)>; 6457} 6458 6459} // AddedComplexity 6460 6461//===----------------------------------------------------------------------===// 6462// SSE4.2 - Compare Instructions 6463//===----------------------------------------------------------------------===// 6464 6465/// SS42I_binop_rm - Simple SSE 4.2 binary operator 6466multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6467 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6468 X86MemOperand x86memop, X86FoldableSchedWrite sched, 6469 bit Is2Addr = 1> { 6470 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 6471 (ins RC:$src1, RC:$src2), 6472 !if(Is2Addr, 6473 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6474 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6475 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6476 Sched<[sched]>; 6477 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 6478 (ins RC:$src1, x86memop:$src2), 6479 !if(Is2Addr, 6480 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6481 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6482 [(set RC:$dst, 6483 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, 6484 Sched<[sched.Folded, sched.ReadAfterFold]>; 6485} 6486 6487let Predicates = [HasAVX] in 6488 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 6489 load, i128mem, SchedWriteVecALU.XMM, 0>, 6490 VEX_4V, VEX_WIG; 6491 6492let Predicates = [HasAVX2] in 6493 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 6494 load, i256mem, SchedWriteVecALU.YMM, 0>, 6495 VEX_4V, VEX_L, VEX_WIG; 6496 6497let Constraints = "$src1 = $dst" in 6498 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 6499 memop, i128mem, SchedWriteVecALU.XMM>; 6500 6501//===----------------------------------------------------------------------===// 6502// SSE4.2 - String/text Processing Instructions 6503//===----------------------------------------------------------------------===// 6504 6505multiclass pcmpistrm_SS42AI<string asm> { 6506 def rr : SS42AI<0x62, MRMSrcReg, (outs), 6507 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6508 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6509 []>, Sched<[WritePCmpIStrM]>; 6510 let mayLoad = 1 in 6511 def rm :SS42AI<0x62, MRMSrcMem, (outs), 6512 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6513 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6514 []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; 6515} 6516 6517let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 6518 let Predicates = [HasAVX] in 6519 defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; 6520 defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; 6521} 6522 6523multiclass SS42AI_pcmpestrm<string asm> { 6524 def rr : SS42AI<0x60, MRMSrcReg, (outs), 6525 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6526 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6527 []>, Sched<[WritePCmpEStrM]>; 6528 let mayLoad = 1 in 6529 def rm : SS42AI<0x60, MRMSrcMem, (outs), 6530 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6531 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6532 []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; 6533} 6534 6535let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6536 let Predicates = [HasAVX] in 6537 defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; 6538 defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; 6539} 6540 6541multiclass SS42AI_pcmpistri<string asm> { 6542 def rr : SS42AI<0x63, MRMSrcReg, (outs), 6543 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6544 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6545 []>, Sched<[WritePCmpIStrI]>; 6546 let mayLoad = 1 in 6547 def rm : SS42AI<0x63, MRMSrcMem, (outs), 6548 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6549 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6550 []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; 6551} 6552 6553let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 6554 let Predicates = [HasAVX] in 6555 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; 6556 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 6557} 6558 6559multiclass SS42AI_pcmpestri<string asm> { 6560 def rr : SS42AI<0x61, MRMSrcReg, (outs), 6561 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 6562 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6563 []>, Sched<[WritePCmpEStrI]>; 6564 let mayLoad = 1 in 6565 def rm : SS42AI<0x61, MRMSrcMem, (outs), 6566 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 6567 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6568 []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; 6569} 6570 6571let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 6572 let Predicates = [HasAVX] in 6573 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; 6574 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 6575} 6576 6577//===----------------------------------------------------------------------===// 6578// SSE4.2 - CRC Instructions 6579//===----------------------------------------------------------------------===// 6580 6581// No CRC instructions have AVX equivalents 6582 6583// crc intrinsic instruction 6584// This set of instructions are only rm, the only difference is the size 6585// of r and m. 6586class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 6587 RegisterClass RCIn, SDPatternOperator Int> : 6588 CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 6589 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6590 [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, 6591 Sched<[WriteCRC32]>; 6592 6593class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 6594 X86MemOperand x86memop, SDPatternOperator Int> : 6595 CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 6596 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 6597 [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, 6598 Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; 6599 6600let Constraints = "$src1 = $dst" in { 6601 def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 6602 int_x86_sse42_crc32_32_8>; 6603 def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 6604 int_x86_sse42_crc32_32_8>; 6605 def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 6606 int_x86_sse42_crc32_32_16>, OpSize16; 6607 def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 6608 int_x86_sse42_crc32_32_16>, OpSize16; 6609 def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 6610 int_x86_sse42_crc32_32_32>, OpSize32; 6611 def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 6612 int_x86_sse42_crc32_32_32>, OpSize32; 6613 def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 6614 int_x86_sse42_crc32_64_64>, REX_W; 6615 def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 6616 int_x86_sse42_crc32_64_64>, REX_W; 6617 let hasSideEffects = 0 in { 6618 let mayLoad = 1 in 6619 def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 6620 null_frag>, REX_W; 6621 def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 6622 null_frag>, REX_W; 6623 } 6624} 6625 6626//===----------------------------------------------------------------------===// 6627// SHA-NI Instructions 6628//===----------------------------------------------------------------------===// 6629 6630// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? 6631multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 6632 X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { 6633 def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 6634 (ins VR128:$src1, VR128:$src2), 6635 !if(UsesXMM0, 6636 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6637 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6638 [!if(UsesXMM0, 6639 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 6640 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, 6641 T8PS, Sched<[sched]>; 6642 6643 def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 6644 (ins VR128:$src1, i128mem:$src2), 6645 !if(UsesXMM0, 6646 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"), 6647 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), 6648 [!if(UsesXMM0, 6649 (set VR128:$dst, (IntId VR128:$src1, 6650 (memop addr:$src2), XMM0)), 6651 (set VR128:$dst, (IntId VR128:$src1, 6652 (memop addr:$src2))))]>, T8PS, 6653 Sched<[sched.Folded, sched.ReadAfterFold]>; 6654} 6655 6656let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 6657 def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 6658 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6659 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6660 [(set VR128:$dst, 6661 (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 6662 (i8 timm:$src3)))]>, TAPS, 6663 Sched<[SchedWriteVecIMul.XMM]>; 6664 def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 6665 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6666 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6667 [(set VR128:$dst, 6668 (int_x86_sha1rnds4 VR128:$src1, 6669 (memop addr:$src2), 6670 (i8 timm:$src3)))]>, TAPS, 6671 Sched<[SchedWriteVecIMul.XMM.Folded, 6672 SchedWriteVecIMul.XMM.ReadAfterFold]>; 6673 6674 defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, 6675 SchedWriteVecIMul.XMM>; 6676 defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, 6677 SchedWriteVecIMul.XMM>; 6678 defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, 6679 SchedWriteVecIMul.XMM>; 6680 6681 let Uses=[XMM0] in 6682 defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 6683 SchedWriteVecIMul.XMM, 1>; 6684 6685 defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, 6686 SchedWriteVecIMul.XMM>; 6687 defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, 6688 SchedWriteVecIMul.XMM>; 6689} 6690 6691// Aliases with explicit %xmm0 6692def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6693 (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>; 6694def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}", 6695 (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>; 6696 6697//===----------------------------------------------------------------------===// 6698// AES-NI Instructions 6699//===----------------------------------------------------------------------===// 6700 6701multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 6702 Intrinsic IntId, PatFrag ld_frag, 6703 bit Is2Addr = 0, RegisterClass RC = VR128, 6704 X86MemOperand MemOp = i128mem> { 6705 let AsmString = OpcodeStr# 6706 !if(Is2Addr, "\t{$src2, $dst|$dst, $src2}", 6707 "\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 6708 def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst), 6709 (ins RC:$src1, RC:$src2), "", 6710 [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>, 6711 Sched<[WriteAESDecEnc]>; 6712 def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), 6713 (ins RC:$src1, MemOp:$src2), "", 6714 [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, 6715 Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; 6716 } 6717} 6718 6719// Perform One Round of an AES Encryption/Decryption Flow 6720let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in { 6721 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 6722 int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG; 6723 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 6724 int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG; 6725 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 6726 int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG; 6727 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 6728 int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG; 6729} 6730 6731let Predicates = [NoVLX, HasVAES] in { 6732 defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc", 6733 int_x86_aesni_aesenc_256, load, 0, VR256, 6734 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6735 defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast", 6736 int_x86_aesni_aesenclast_256, load, 0, VR256, 6737 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6738 defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec", 6739 int_x86_aesni_aesdec_256, load, 0, VR256, 6740 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6741 defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast", 6742 int_x86_aesni_aesdeclast_256, load, 0, VR256, 6743 i256mem>, VEX_4V, VEX_L, VEX_WIG; 6744} 6745 6746let Constraints = "$src1 = $dst" in { 6747 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 6748 int_x86_aesni_aesenc, memop, 1>; 6749 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 6750 int_x86_aesni_aesenclast, memop, 1>; 6751 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 6752 int_x86_aesni_aesdec, memop, 1>; 6753 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 6754 int_x86_aesni_aesdeclast, memop, 1>; 6755} 6756 6757// Perform the AES InvMixColumn Transformation 6758let Predicates = [HasAVX, HasAES] in { 6759 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6760 (ins VR128:$src1), 6761 "vaesimc\t{$src1, $dst|$dst, $src1}", 6762 [(set VR128:$dst, 6763 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 6764 VEX, VEX_WIG; 6765 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6766 (ins i128mem:$src1), 6767 "vaesimc\t{$src1, $dst|$dst, $src1}", 6768 [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>, 6769 Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; 6770} 6771def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6772 (ins VR128:$src1), 6773 "aesimc\t{$src1, $dst|$dst, $src1}", 6774 [(set VR128:$dst, 6775 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 6776def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6777 (ins i128mem:$src1), 6778 "aesimc\t{$src1, $dst|$dst, $src1}", 6779 [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>, 6780 Sched<[WriteAESIMC.Folded]>; 6781 6782// AES Round Key Generation Assist 6783let Predicates = [HasAVX, HasAES] in { 6784 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6785 (ins VR128:$src1, u8imm:$src2), 6786 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6787 [(set VR128:$dst, 6788 (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 6789 Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; 6790 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6791 (ins i128mem:$src1, u8imm:$src2), 6792 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6793 [(set VR128:$dst, 6794 (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, 6795 Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; 6796} 6797def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6798 (ins VR128:$src1, u8imm:$src2), 6799 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6800 [(set VR128:$dst, 6801 (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, 6802 Sched<[WriteAESKeyGen]>; 6803def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6804 (ins i128mem:$src1, u8imm:$src2), 6805 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6806 [(set VR128:$dst, 6807 (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, 6808 Sched<[WriteAESKeyGen.Folded]>; 6809 6810//===----------------------------------------------------------------------===// 6811// PCLMUL Instructions 6812//===----------------------------------------------------------------------===// 6813 6814// Immediate transform to help with commuting. 6815def PCLMULCommuteImm : SDNodeXForm<timm, [{ 6816 uint8_t Imm = N->getZExtValue(); 6817 return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); 6818}]>; 6819 6820// SSE carry-less Multiplication instructions 6821let Predicates = [NoAVX, HasPCLMUL] in { 6822 let Constraints = "$src1 = $dst" in { 6823 let isCommutable = 1 in 6824 def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 6825 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6826 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6827 [(set VR128:$dst, 6828 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, 6829 Sched<[WriteCLMul]>; 6830 6831 def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 6832 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 6833 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6834 [(set VR128:$dst, 6835 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), 6836 timm:$src3))]>, 6837 Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6838 } // Constraints = "$src1 = $dst" 6839 6840 def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, 6841 (i8 timm:$src3)), 6842 (PCLMULQDQrm VR128:$src1, addr:$src2, 6843 (PCLMULCommuteImm timm:$src3))>; 6844} // Predicates = [NoAVX, HasPCLMUL] 6845 6846// SSE aliases 6847foreach HI = ["hq","lq"] in 6848foreach LO = ["hq","lq"] in { 6849 def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6850 (PCLMULQDQrr VR128:$dst, VR128:$src, 6851 !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6852 def : InstAlias<"pclmul" # HI # LO # "dq\t{$src, $dst|$dst, $src}", 6853 (PCLMULQDQrm VR128:$dst, i128mem:$src, 6854 !add(!shl(!eq(LO,"hq"),4),!eq(HI,"hq"))), 0>; 6855} 6856 6857// AVX carry-less Multiplication instructions 6858multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, 6859 PatFrag LdFrag, Intrinsic IntId> { 6860 let isCommutable = 1 in 6861 def rr : PCLMULIi8<0x44, MRMSrcReg, (outs RC:$dst), 6862 (ins RC:$src1, RC:$src2, u8imm:$src3), 6863 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6864 [(set RC:$dst, 6865 (IntId RC:$src1, RC:$src2, timm:$src3))]>, 6866 Sched<[WriteCLMul]>; 6867 6868 def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), 6869 (ins RC:$src1, MemOp:$src2, u8imm:$src3), 6870 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6871 [(set RC:$dst, 6872 (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, 6873 Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; 6874 6875 // We can commute a load in the first operand by swapping the sources and 6876 // rotating the immediate. 6877 def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), 6878 (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2, 6879 (PCLMULCommuteImm timm:$src3))>; 6880} 6881 6882let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in 6883defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load, 6884 int_x86_pclmulqdq>, VEX_4V, VEX_WIG; 6885 6886let Predicates = [NoVLX, HasVPCLMULQDQ] in 6887defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load, 6888 int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG; 6889 6890multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC, 6891 X86MemOperand MemOp, string Hi, string Lo> { 6892 def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6893 (!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2, 6894 !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6895 def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6896 (!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2, 6897 !add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>; 6898} 6899 6900multiclass vpclmulqdq_aliases<string InstStr, RegisterClass RC, 6901 X86MemOperand MemOp> { 6902 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "hq">; 6903 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "hq", "lq">; 6904 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "hq">; 6905 defm : vpclmulqdq_aliases_impl<InstStr, RC, MemOp, "lq", "lq">; 6906} 6907 6908// AVX aliases 6909defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>; 6910defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>; 6911 6912//===----------------------------------------------------------------------===// 6913// SSE4A Instructions 6914//===----------------------------------------------------------------------===// 6915 6916let Predicates = [HasSSE4A] in { 6917 6918let ExeDomain = SSEPackedInt in { 6919let Constraints = "$src = $dst" in { 6920def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 6921 (ins VR128:$src, u8imm:$len, u8imm:$idx), 6922 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 6923 [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, 6924 timm:$idx))]>, 6925 PD, Sched<[SchedWriteVecALU.XMM]>; 6926def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6927 (ins VR128:$src, VR128:$mask), 6928 "extrq\t{$mask, $src|$src, $mask}", 6929 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 6930 VR128:$mask))]>, 6931 PD, Sched<[SchedWriteVecALU.XMM]>; 6932 6933def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 6934 (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 6935 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 6936 [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, 6937 timm:$len, timm:$idx))]>, 6938 XD, Sched<[SchedWriteVecALU.XMM]>; 6939def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 6940 (ins VR128:$src, VR128:$mask), 6941 "insertq\t{$mask, $src|$src, $mask}", 6942 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 6943 VR128:$mask))]>, 6944 XD, Sched<[SchedWriteVecALU.XMM]>; 6945} 6946} // ExeDomain = SSEPackedInt 6947 6948// Non-temporal (unaligned) scalar stores. 6949let AddedComplexity = 400 in { // Prefer non-temporal versions 6950let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { 6951def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 6952 "movntss\t{$src, $dst|$dst, $src}", []>, XS; 6953 6954def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 6955 "movntsd\t{$src, $dst|$dst, $src}", []>, XD; 6956} // SchedRW 6957 6958def : Pat<(nontemporalstore FR32:$src, addr:$dst), 6959 (MOVNTSS addr:$dst, (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 6960 6961def : Pat<(nontemporalstore FR64:$src, addr:$dst), 6962 (MOVNTSD addr:$dst, (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 6963 6964} // AddedComplexity 6965} // HasSSE4A 6966 6967//===----------------------------------------------------------------------===// 6968// AVX Instructions 6969//===----------------------------------------------------------------------===// 6970 6971//===----------------------------------------------------------------------===// 6972// VBROADCAST - Load from memory and broadcast to all elements of the 6973// destination operand 6974// 6975class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC, 6976 X86MemOperand x86memop, ValueType VT, 6977 PatFrag bcast_frag, SchedWrite Sched> : 6978 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 6979 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6980 [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, 6981 Sched<[Sched]>, VEX; 6982 6983// AVX2 adds register forms 6984class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC, 6985 ValueType ResVT, ValueType OpVT, SchedWrite Sched> : 6986 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 6987 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6988 [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, 6989 Sched<[Sched]>, VEX; 6990 6991let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { 6992 def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, 6993 f32mem, v4f32, X86VBroadcastld32, 6994 SchedWriteFShuffle.XMM.Folded>; 6995 def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, 6996 f32mem, v8f32, X86VBroadcastld32, 6997 SchedWriteFShuffle.XMM.Folded>, VEX_L; 6998} 6999let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in 7000def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, 7001 v4f64, X86VBroadcastld64, 7002 SchedWriteFShuffle.XMM.Folded>, VEX_L; 7003 7004let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { 7005 def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, 7006 v4f32, v4f32, SchedWriteFShuffle.XMM>; 7007 def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, 7008 v8f32, v4f32, WriteFShuffle256>, VEX_L; 7009} 7010let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in 7011def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, 7012 v4f64, v2f64, WriteFShuffle256>, VEX_L; 7013 7014//===----------------------------------------------------------------------===// 7015// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both 7016// halves of a 256-bit vector. 7017// 7018let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in 7019def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), 7020 (ins i128mem:$src), 7021 "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, 7022 Sched<[WriteShuffleLd]>, VEX, VEX_L; 7023 7024let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], 7025 ExeDomain = SSEPackedSingle in 7026def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), 7027 (ins f128mem:$src), 7028 "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, 7029 Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; 7030 7031let Predicates = [HasAVX, NoVLX] in { 7032def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 7033 (VBROADCASTF128 addr:$src)>; 7034def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 7035 (VBROADCASTF128 addr:$src)>; 7036// NOTE: We're using FP instructions here, but execution domain fixing can 7037// convert to integer when profitable. 7038def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 7039 (VBROADCASTF128 addr:$src)>; 7040def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 7041 (VBROADCASTF128 addr:$src)>; 7042def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 7043 (VBROADCASTF128 addr:$src)>; 7044def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 7045 (VBROADCASTF128 addr:$src)>; 7046} 7047 7048//===----------------------------------------------------------------------===// 7049// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7050// 7051 7052let ExeDomain = SSEPackedSingle in { 7053let isCommutable = 1 in 7054def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7055 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7056 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7057 VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; 7058def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7059 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7060 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7061 VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; 7062} 7063 7064// Immediate transform to help with commuting. 7065def Perm2XCommuteImm : SDNodeXForm<timm, [{ 7066 return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); 7067}]>; 7068 7069multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { 7070 def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), 7071 (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; 7072 def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), 7073 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; 7074 // Pattern with load in other operand. 7075 def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), 7076 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7077 (Perm2XCommuteImm timm:$imm))>; 7078} 7079 7080let Predicates = [HasAVX] in { 7081 defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; 7082 defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; 7083} 7084 7085let Predicates = [HasAVX1Only] in { 7086 defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; 7087 defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; 7088 defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; 7089 defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; 7090} 7091 7092//===----------------------------------------------------------------------===// 7093// VINSERTF128 - Insert packed floating-point values 7094// 7095let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7096def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7097 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7098 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7099 []>, Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; 7100let mayLoad = 1 in 7101def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7102 (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 7103 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7104 []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7105} 7106 7107// To create a 256-bit all ones value, we should produce VCMPTRUEPS 7108// with YMM register containing zero. 7109// FIXME: Avoid producing vxorps to clear the fake inputs. 7110let Predicates = [HasAVX1Only] in { 7111def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; 7112} 7113 7114multiclass vinsert_lowering<string InstrStr, string PermStr, 7115 ValueType From, ValueType To, 7116 PatFrag frommemop_frag, PatFrag tomemop_frag> { 7117 def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), 7118 (iPTR imm)), 7119 (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2, 7120 (INSERT_get_vinsert128_imm VR256:$ins))>; 7121 def : Pat<(vinsert128_insert:$ins (To VR256:$src1), 7122 (From (frommemop_frag addr:$src2)), 7123 (iPTR imm)), 7124 (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, 7125 (INSERT_get_vinsert128_imm VR256:$ins))>; 7126 // Folding "To" vector - convert to perm2x128 and commute inputs. 7127 def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)), 7128 (From VR128:$src2), 7129 (iPTR imm)), 7130 (!cast<Instruction>(PermStr#rm) 7131 (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 7132 addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>; 7133} 7134 7135let Predicates = [HasAVX, NoVLX] in { 7136 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>; 7137 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>; 7138} 7139 7140let Predicates = [HasAVX1Only] in { 7141 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; 7142 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; 7143 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; 7144 defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; 7145} 7146 7147//===----------------------------------------------------------------------===// 7148// VEXTRACTF128 - Extract packed floating-point values 7149// 7150let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7151def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7152 (ins VR256:$src1, u8imm:$src2), 7153 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7154 []>, Sched<[WriteFShuffle256]>, VEX, VEX_L; 7155let mayStore = 1 in 7156def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7157 (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 7158 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7159 []>, Sched<[WriteFStoreX]>, VEX, VEX_L; 7160} 7161 7162multiclass vextract_lowering<string InstrStr, ValueType From, ValueType To> { 7163 def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7164 (To (!cast<Instruction>(InstrStr#rr) 7165 (From VR256:$src1), 7166 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7167 def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1), 7168 (iPTR imm))), addr:$dst), 7169 (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1, 7170 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7171} 7172 7173// AVX1 patterns 7174let Predicates = [HasAVX, NoVLX] in { 7175 defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>; 7176 defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>; 7177} 7178 7179let Predicates = [HasAVX1Only] in { 7180 defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; 7181 defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; 7182 defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; 7183 defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; 7184} 7185 7186//===----------------------------------------------------------------------===// 7187// VMASKMOV - Conditional SIMD Packed Loads and Stores 7188// 7189multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 7190 Intrinsic IntLd, Intrinsic IntLd256, 7191 Intrinsic IntSt, Intrinsic IntSt256, 7192 X86SchedWriteMaskMove schedX, 7193 X86SchedWriteMaskMove schedY> { 7194 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 7195 (ins VR128:$src1, f128mem:$src2), 7196 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7197 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7198 VEX_4V, Sched<[schedX.RM]>; 7199 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 7200 (ins VR256:$src1, f256mem:$src2), 7201 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7202 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7203 VEX_4V, VEX_L, Sched<[schedY.RM]>; 7204 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 7205 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 7206 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7207 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, 7208 VEX_4V, Sched<[schedX.MR]>; 7209 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 7210 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 7211 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7212 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7213 VEX_4V, VEX_L, Sched<[schedY.MR]>; 7214} 7215 7216let ExeDomain = SSEPackedSingle in 7217defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 7218 int_x86_avx_maskload_ps, 7219 int_x86_avx_maskload_ps_256, 7220 int_x86_avx_maskstore_ps, 7221 int_x86_avx_maskstore_ps_256, 7222 WriteFMaskMove32, WriteFMaskMove32Y>; 7223let ExeDomain = SSEPackedDouble in 7224defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 7225 int_x86_avx_maskload_pd, 7226 int_x86_avx_maskload_pd_256, 7227 int_x86_avx_maskstore_pd, 7228 int_x86_avx_maskstore_pd_256, 7229 WriteFMaskMove64, WriteFMaskMove64Y>; 7230 7231//===----------------------------------------------------------------------===// 7232// AVX_VNNI 7233//===----------------------------------------------------------------------===// 7234let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst", 7235 ExplicitVEXPrefix = 1, checkVEXPredicate = 1 in 7236multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7237 bit IsCommutable> { 7238 let isCommutable = IsCommutable in 7239 def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), 7240 (ins VR128:$src1, VR128:$src2, VR128:$src3), 7241 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7242 [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, 7243 VR128:$src2, VR128:$src3)))]>, 7244 VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7245 7246 def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), 7247 (ins VR128:$src1, VR128:$src2, i128mem:$src3), 7248 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7249 [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, 7250 (loadv4i32 addr:$src3))))]>, 7251 VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; 7252 7253 let isCommutable = IsCommutable in 7254 def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), 7255 (ins VR256:$src1, VR256:$src2, VR256:$src3), 7256 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7257 [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, 7258 VR256:$src2, VR256:$src3)))]>, 7259 VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7260 7261 def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), 7262 (ins VR256:$src1, VR256:$src2, i256mem:$src3), 7263 !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7264 [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, 7265 (loadv8i32 addr:$src3))))]>, 7266 VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; 7267} 7268 7269defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>; 7270defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>; 7271defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>; 7272defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>; 7273 7274def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), 7275 (X86vpmaddwd node:$lhs, node:$rhs), [{ 7276 return N->hasOneUse(); 7277}]>; 7278 7279let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { 7280 def : Pat<(v8i32 (add VR256:$src1, 7281 (X86vpmaddwd_su VR256:$src2, VR256:$src3))), 7282 (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; 7283 def : Pat<(v8i32 (add VR256:$src1, 7284 (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), 7285 (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; 7286 def : Pat<(v4i32 (add VR128:$src1, 7287 (X86vpmaddwd_su VR128:$src2, VR128:$src3))), 7288 (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; 7289 def : Pat<(v4i32 (add VR128:$src1, 7290 (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), 7291 (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; 7292} 7293 7294//===----------------------------------------------------------------------===// 7295// VPERMIL - Permute Single and Double Floating-Point Values 7296// 7297 7298multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 7299 RegisterClass RC, X86MemOperand x86memop_f, 7300 X86MemOperand x86memop_i, 7301 ValueType f_vt, ValueType i_vt, 7302 X86FoldableSchedWrite sched, 7303 X86FoldableSchedWrite varsched> { 7304 let Predicates = [HasAVX, NoVLX] in { 7305 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 7306 (ins RC:$src1, RC:$src2), 7307 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7308 [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, 7309 Sched<[varsched]>; 7310 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 7311 (ins RC:$src1, x86memop_i:$src2), 7312 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7313 [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, 7314 (i_vt (load addr:$src2)))))]>, VEX_4V, 7315 Sched<[varsched.Folded, sched.ReadAfterFold]>; 7316 7317 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 7318 (ins RC:$src1, u8imm:$src2), 7319 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7320 [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, 7321 Sched<[sched]>; 7322 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 7323 (ins x86memop_f:$src1, u8imm:$src2), 7324 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7325 [(set RC:$dst, 7326 (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, 7327 Sched<[sched.Folded]>; 7328 }// Predicates = [HasAVX, NoVLX] 7329} 7330 7331let ExeDomain = SSEPackedSingle in { 7332 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 7333 v4f32, v4i32, SchedWriteFShuffle.XMM, 7334 SchedWriteFVarShuffle.XMM>; 7335 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 7336 v8f32, v8i32, SchedWriteFShuffle.YMM, 7337 SchedWriteFVarShuffle.YMM>, VEX_L; 7338} 7339let ExeDomain = SSEPackedDouble in { 7340 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 7341 v2f64, v2i64, SchedWriteFShuffle.XMM, 7342 SchedWriteFVarShuffle.XMM>; 7343 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 7344 v4f64, v4i64, SchedWriteFShuffle.YMM, 7345 SchedWriteFVarShuffle.YMM>, VEX_L; 7346} 7347 7348//===----------------------------------------------------------------------===// 7349// VZERO - Zero YMM registers 7350// Note: These instruction do not affect the YMM16-YMM31. 7351// 7352 7353let SchedRW = [WriteSystem] in { 7354let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 7355 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 7356 // Zero All YMM registers 7357 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7358 [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, 7359 Requires<[HasAVX]>, VEX_WIG; 7360 7361 // Zero Upper bits of YMM registers 7362 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7363 [(int_x86_avx_vzeroupper)]>, PS, VEX, 7364 Requires<[HasAVX]>, VEX_WIG; 7365} // Defs 7366} // SchedRW 7367 7368//===----------------------------------------------------------------------===// 7369// Half precision conversion instructions 7370// 7371 7372multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, 7373 X86FoldableSchedWrite sched> { 7374 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7375 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7376 [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>, 7377 T8PD, VEX, Sched<[sched]>; 7378 let hasSideEffects = 0, mayLoad = 1 in 7379 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7380 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7381 []>, T8PD, VEX, Sched<[sched.Folded]>; 7382} 7383 7384multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, 7385 SchedWrite RR, SchedWrite MR> { 7386 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 7387 (ins RC:$src1, i32u8imm:$src2), 7388 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7389 [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>, 7390 TAPD, VEX, Sched<[RR]>; 7391 let hasSideEffects = 0, mayStore = 1 in 7392 def mr : Ii8<0x1D, MRMDestMem, (outs), 7393 (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 7394 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7395 TAPD, VEX, Sched<[MR]>; 7396} 7397 7398let Predicates = [HasF16C, NoVLX] in { 7399 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>, SIMD_EXC; 7400 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L, SIMD_EXC; 7401 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH, 7402 WriteCvtPS2PHSt>, SIMD_EXC; 7403 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY, 7404 WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC; 7405 7406 // Pattern match vcvtph2ps of a scalar i64 load. 7407 def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 7408 (VCVTPH2PSrm addr:$src)>; 7409 def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 7410 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 7411 (VCVTPH2PSrm addr:$src)>; 7412 def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))), 7413 (VCVTPH2PSYrm addr:$src)>; 7414 7415 def : Pat<(store (f64 (extractelt 7416 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 7417 (iPTR 0))), addr:$dst), 7418 (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 7419 def : Pat<(store (i64 (extractelt 7420 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))), 7421 (iPTR 0))), addr:$dst), 7422 (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; 7423 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), 7424 (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; 7425} 7426 7427//===----------------------------------------------------------------------===// 7428// AVX2 Instructions 7429//===----------------------------------------------------------------------===// 7430 7431/// AVX2_blend_rmi - AVX2 blend with 8-bit immediate 7432multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 7433 ValueType OpVT, X86FoldableSchedWrite sched, 7434 RegisterClass RC, 7435 X86MemOperand x86memop, SDNodeXForm commuteXForm> { 7436 let isCommutable = 1 in 7437 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 7438 (ins RC:$src1, RC:$src2, u8imm:$src3), 7439 !strconcat(OpcodeStr, 7440 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7441 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, 7442 Sched<[sched]>, VEX_4V; 7443 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 7444 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 7445 !strconcat(OpcodeStr, 7446 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7447 [(set RC:$dst, 7448 (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, 7449 Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; 7450 7451 // Pattern to commute if load is in first source. 7452 def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), 7453 (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2, 7454 (commuteXForm timm:$src3))>; 7455} 7456 7457let Predicates = [HasAVX2] in { 7458defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, 7459 SchedWriteBlend.XMM, VR128, i128mem, 7460 BlendCommuteImm4>; 7461defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, 7462 SchedWriteBlend.YMM, VR256, i256mem, 7463 BlendCommuteImm8>, VEX_L; 7464 7465def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), 7466 (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; 7467def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), 7468 (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; 7469def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), 7470 (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; 7471 7472def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), 7473 (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; 7474def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), 7475 (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; 7476def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), 7477 (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; 7478} 7479 7480// For insertion into the zero index (low half) of a 256-bit vector, it is 7481// more efficient to generate a blend with immediate instead of an insert*128. 7482// NOTE: We're using FP instructions here, but execution domain fixing should 7483// take care of using integer instructions when profitable. 7484let Predicates = [HasAVX] in { 7485def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)), 7486 (VBLENDPSYrri VR256:$src1, 7487 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7488 VR128:$src2, sub_xmm), 0xf)>; 7489def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)), 7490 (VBLENDPSYrri VR256:$src1, 7491 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7492 VR128:$src2, sub_xmm), 0xf)>; 7493def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)), 7494 (VBLENDPSYrri VR256:$src1, 7495 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7496 VR128:$src2, sub_xmm), 0xf)>; 7497def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), 7498 (VBLENDPSYrri VR256:$src1, 7499 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7500 VR128:$src2, sub_xmm), 0xf)>; 7501 7502def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)), 7503 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7504 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7505def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)), 7506 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7507 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7508def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), 7509 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7510 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7511def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), 7512 (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7513 VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; 7514} 7515 7516//===----------------------------------------------------------------------===// 7517// VPBROADCAST - Load from memory and broadcast to all elements of the 7518// destination operand 7519// 7520multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 7521 X86MemOperand x86memop, PatFrag bcast_frag, 7522 ValueType OpVT128, ValueType OpVT256, Predicate prd> { 7523 let Predicates = [HasAVX2, prd] in { 7524 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 7525 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7526 [(set VR128:$dst, 7527 (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7528 Sched<[SchedWriteShuffle.XMM]>, VEX; 7529 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 7530 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7531 [(set VR128:$dst, 7532 (OpVT128 (bcast_frag addr:$src)))]>, 7533 Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; 7534 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 7535 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7536 [(set VR256:$dst, 7537 (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, 7538 Sched<[WriteShuffle256]>, VEX, VEX_L; 7539 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 7540 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7541 [(set VR256:$dst, 7542 (OpVT256 (bcast_frag addr:$src)))]>, 7543 Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; 7544 7545 // Provide aliases for broadcast from the same register class that 7546 // automatically does the extract. 7547 def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), 7548 (!cast<Instruction>(NAME#"Yrr") 7549 (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; 7550 } 7551} 7552 7553defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, 7554 v16i8, v32i8, NoVLX_Or_NoBWI>; 7555defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, 7556 v8i16, v16i16, NoVLX_Or_NoBWI>; 7557defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, 7558 v4i32, v8i32, NoVLX>; 7559defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, 7560 v2i64, v4i64, NoVLX>; 7561 7562let Predicates = [HasAVX2, NoVLX] in { 7563 // Provide fallback in case the load node that is used in the patterns above 7564 // is used by additional users, which prevents the pattern selection. 7565 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7566 (VBROADCASTSSrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7567 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7568 (VBROADCASTSSYrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)))>; 7569 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7570 (VBROADCASTSDYrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7571} 7572 7573let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { 7574 def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 7575 (VPBROADCASTBrr (VMOVDI2PDIrr 7576 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7577 GR8:$src, sub_8bit))))>; 7578 def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 7579 (VPBROADCASTBYrr (VMOVDI2PDIrr 7580 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7581 GR8:$src, sub_8bit))))>; 7582 7583 def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 7584 (VPBROADCASTWrr (VMOVDI2PDIrr 7585 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7586 GR16:$src, sub_16bit))))>; 7587 def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 7588 (VPBROADCASTWYrr (VMOVDI2PDIrr 7589 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 7590 GR16:$src, sub_16bit))))>; 7591} 7592let Predicates = [HasAVX2, NoVLX] in { 7593 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7594 (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; 7595 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7596 (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; 7597 def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 7598 (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; 7599 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7600 (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; 7601} 7602 7603// AVX1 broadcast patterns 7604let Predicates = [HasAVX1Only] in { 7605def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), 7606 (VBROADCASTSSYrm addr:$src)>; 7607def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), 7608 (VBROADCASTSDYrm addr:$src)>; 7609def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), 7610 (VBROADCASTSSrm addr:$src)>; 7611} 7612 7613 // Provide fallback in case the load node that is used in the patterns above 7614 // is used by additional users, which prevents the pattern selection. 7615let Predicates = [HasAVX, NoVLX] in { 7616 // 128bit broadcasts: 7617 def : Pat<(v2f64 (X86VBroadcast f64:$src)), 7618 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; 7619 def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), 7620 (VMOVDDUPrm addr:$src)>; 7621 7622 def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), 7623 (VMOVDDUPrr VR128:$src)>; 7624} 7625 7626let Predicates = [HasAVX1Only] in { 7627 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7628 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)>; 7629 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7630 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7631 (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), 7632 (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; 7633 def : Pat<(v8f32 (X86VBroadcast v4f32:$src)), 7634 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7635 (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm), 7636 (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>; 7637 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7638 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7639 (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), 7640 (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; 7641 def : Pat<(v4f64 (X86VBroadcast v2f64:$src)), 7642 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7643 (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm), 7644 (v2f64 (VMOVDDUPrr VR128:$src)), 1)>; 7645 7646 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7647 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; 7648 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7649 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7650 (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), 7651 (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; 7652 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7653 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 7654 (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), 7655 (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; 7656 7657 def : Pat<(v2i64 (X86VBroadcast i64:$src)), 7658 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; 7659 def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), 7660 (VMOVDDUPrm addr:$src)>; 7661} 7662 7663//===----------------------------------------------------------------------===// 7664// VPERM - Permute instructions 7665// 7666 7667multiclass avx2_perm<bits<8> opc, string OpcodeStr, 7668 ValueType OpVT, X86FoldableSchedWrite Sched, 7669 X86MemOperand memOp> { 7670 let Predicates = [HasAVX2, NoVLX] in { 7671 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7672 (ins VR256:$src1, VR256:$src2), 7673 !strconcat(OpcodeStr, 7674 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7675 [(set VR256:$dst, 7676 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 7677 Sched<[Sched]>, VEX_4V, VEX_L; 7678 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7679 (ins VR256:$src1, memOp:$src2), 7680 !strconcat(OpcodeStr, 7681 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7682 [(set VR256:$dst, 7683 (OpVT (X86VPermv VR256:$src1, 7684 (load addr:$src2))))]>, 7685 Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; 7686 } 7687} 7688 7689defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>; 7690let ExeDomain = SSEPackedSingle in 7691defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>; 7692 7693multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7694 ValueType OpVT, X86FoldableSchedWrite Sched, 7695 X86MemOperand memOp> { 7696 let Predicates = [HasAVX2, NoVLX] in { 7697 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 7698 (ins VR256:$src1, u8imm:$src2), 7699 !strconcat(OpcodeStr, 7700 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7701 [(set VR256:$dst, 7702 (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, 7703 Sched<[Sched]>, VEX, VEX_L; 7704 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 7705 (ins memOp:$src1, u8imm:$src2), 7706 !strconcat(OpcodeStr, 7707 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7708 [(set VR256:$dst, 7709 (OpVT (X86VPermi (mem_frag addr:$src1), 7710 (i8 timm:$src2))))]>, 7711 Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; 7712 } 7713} 7714 7715defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 7716 WriteShuffle256, i256mem>, VEX_W; 7717let ExeDomain = SSEPackedDouble in 7718defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 7719 WriteFShuffle256, f256mem>, VEX_W; 7720 7721//===----------------------------------------------------------------------===// 7722// VPERM2I128 - Permute Integer vector Values in 128-bit chunks 7723// 7724let isCommutable = 1 in 7725def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 7726 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 7727 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7728 Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 7729def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 7730 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 7731 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, 7732 Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7733 7734let Predicates = [HasAVX2] in { 7735 defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; 7736 defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; 7737 defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; 7738 defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; 7739} 7740 7741//===----------------------------------------------------------------------===// 7742// VINSERTI128 - Insert packed integer values 7743// 7744let hasSideEffects = 0 in { 7745def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 7746 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7747 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7748 []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 7749let mayLoad = 1 in 7750def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 7751 (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 7752 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7753 []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; 7754} 7755 7756let Predicates = [HasAVX2, NoVLX] in { 7757 defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; 7758 defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; 7759 defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; 7760 defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; 7761} 7762 7763//===----------------------------------------------------------------------===// 7764// VEXTRACTI128 - Extract packed integer values 7765// 7766def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 7767 (ins VR256:$src1, u8imm:$src2), 7768 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7769 Sched<[WriteShuffle256]>, VEX, VEX_L; 7770let hasSideEffects = 0, mayStore = 1 in 7771def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 7772 (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 7773 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7774 Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_L; 7775 7776let Predicates = [HasAVX2, NoVLX] in { 7777 defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; 7778 defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; 7779 defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; 7780 defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; 7781} 7782 7783//===----------------------------------------------------------------------===// 7784// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 7785// 7786multiclass avx2_pmovmask<string OpcodeStr, 7787 Intrinsic IntLd128, Intrinsic IntLd256, 7788 Intrinsic IntSt128, Intrinsic IntSt256, 7789 X86SchedWriteMaskMove schedX, 7790 X86SchedWriteMaskMove schedY> { 7791 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 7792 (ins VR128:$src1, i128mem:$src2), 7793 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7794 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, 7795 VEX_4V, Sched<[schedX.RM]>; 7796 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 7797 (ins VR256:$src1, i256mem:$src2), 7798 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7799 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7800 VEX_4V, VEX_L, Sched<[schedY.RM]>; 7801 def mr : AVX28I<0x8e, MRMDestMem, (outs), 7802 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 7803 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7804 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, 7805 VEX_4V, Sched<[schedX.MR]>; 7806 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 7807 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 7808 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7809 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, 7810 VEX_4V, VEX_L, Sched<[schedY.MR]>; 7811} 7812 7813defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 7814 int_x86_avx2_maskload_d, 7815 int_x86_avx2_maskload_d_256, 7816 int_x86_avx2_maskstore_d, 7817 int_x86_avx2_maskstore_d_256, 7818 WriteVecMaskMove32, WriteVecMaskMove32Y>; 7819defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 7820 int_x86_avx2_maskload_q, 7821 int_x86_avx2_maskload_q_256, 7822 int_x86_avx2_maskstore_q, 7823 int_x86_avx2_maskstore_q_256, 7824 WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W; 7825 7826multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT, 7827 ValueType MaskVT> { 7828 // masked store 7829 def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), 7830 (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; 7831 // masked load 7832 def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)), 7833 (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7834 def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), 7835 (VT immAllZerosV))), 7836 (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>; 7837} 7838let Predicates = [HasAVX] in { 7839 defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; 7840 defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; 7841 defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; 7842 defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; 7843} 7844let Predicates = [HasAVX1Only] in { 7845 // load/store i32/i64 not supported use ps/pd version 7846 defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; 7847 defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; 7848 defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; 7849 defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; 7850} 7851let Predicates = [HasAVX2] in { 7852 defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; 7853 defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; 7854 defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; 7855 defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; 7856} 7857 7858//===----------------------------------------------------------------------===// 7859// Variable Bit Shifts 7860// 7861multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 7862 ValueType vt128, ValueType vt256> { 7863 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 7864 (ins VR128:$src1, VR128:$src2), 7865 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7866 [(set VR128:$dst, 7867 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 7868 VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; 7869 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 7870 (ins VR128:$src1, i128mem:$src2), 7871 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7872 [(set VR128:$dst, 7873 (vt128 (OpNode VR128:$src1, 7874 (vt128 (load addr:$src2)))))]>, 7875 VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, 7876 SchedWriteVarVecShift.XMM.ReadAfterFold]>; 7877 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7878 (ins VR256:$src1, VR256:$src2), 7879 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7880 [(set VR256:$dst, 7881 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 7882 VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; 7883 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7884 (ins VR256:$src1, i256mem:$src2), 7885 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7886 [(set VR256:$dst, 7887 (vt256 (OpNode VR256:$src1, 7888 (vt256 (load addr:$src2)))))]>, 7889 VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, 7890 SchedWriteVarVecShift.YMM.ReadAfterFold]>; 7891} 7892 7893let Predicates = [HasAVX2, NoVLX] in { 7894 defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; 7895 defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; 7896 defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; 7897 defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; 7898 defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; 7899} 7900 7901//===----------------------------------------------------------------------===// 7902// VGATHER - GATHER Operations 7903 7904// FIXME: Improve scheduling of gather instructions. 7905multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 7906 X86MemOperand memop128, X86MemOperand memop256> { 7907let mayLoad = 1, hasSideEffects = 0 in { 7908 def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb), 7909 (ins VR128:$src1, memop128:$src2, VR128:$mask), 7910 !strconcat(OpcodeStr, 7911 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7912 []>, VEX, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 7913 def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb), 7914 (ins RC256:$src1, memop256:$src2, RC256:$mask), 7915 !strconcat(OpcodeStr, 7916 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 7917 []>, VEX, VEX_L, Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 7918} 7919} 7920 7921let Predicates = [HasAVX2] in { 7922 let mayLoad = 1, hasSideEffects = 0, Constraints 7923 = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 7924 in { 7925 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", 7926 VR256, vx128mem, vx256mem>, VEX_W; 7927 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", 7928 VR256, vx128mem, vy256mem>, VEX_W; 7929 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", 7930 VR256, vx128mem, vy256mem>; 7931 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", 7932 VR128, vx64mem, vy128mem>; 7933 7934 let ExeDomain = SSEPackedDouble in { 7935 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", 7936 VR256, vx128mem, vx256mem>, VEX_W; 7937 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", 7938 VR256, vx128mem, vy256mem>, VEX_W; 7939 } 7940 7941 let ExeDomain = SSEPackedSingle in { 7942 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", 7943 VR256, vx128mem, vy256mem>; 7944 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", 7945 VR128, vx64mem, vy128mem>; 7946 } 7947 } 7948} 7949 7950//===----------------------------------------------------------------------===// 7951// GFNI instructions 7952//===----------------------------------------------------------------------===// 7953 7954multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, 7955 RegisterClass RC, PatFrag MemOpFrag, 7956 X86MemOperand X86MemOp, bit Is2Addr = 0> { 7957 let ExeDomain = SSEPackedInt, 7958 AsmString = !if(Is2Addr, 7959 OpcodeStr#"\t{$src2, $dst|$dst, $src2}", 7960 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { 7961 let isCommutable = 1 in 7962 def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", 7963 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, 7964 Sched<[SchedWriteVecALU.XMM]>, T8PD; 7965 7966 def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", 7967 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, 7968 (MemOpFrag addr:$src2))))]>, 7969 Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD; 7970 } 7971} 7972 7973multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, 7974 SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, 7975 X86MemOperand X86MemOp, bit Is2Addr = 0> { 7976 let AsmString = !if(Is2Addr, 7977 OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7978 OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { 7979 def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), 7980 (ins RC:$src1, RC:$src2, u8imm:$src3), "", 7981 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))], 7982 SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; 7983 def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), 7984 (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", 7985 [(set RC:$dst, (OpVT (OpNode RC:$src1, 7986 (MemOpFrag addr:$src2), 7987 timm:$src3)))], SSEPackedInt>, 7988 Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; 7989 } 7990} 7991 7992multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { 7993 let Constraints = "$src1 = $dst", 7994 Predicates = [HasGFNI, UseSSE2] in 7995 defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, 7996 VR128, load, i128mem, 1>; 7997 let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 7998 defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128, 7999 load, i128mem>, VEX_4V, VEX_W; 8000 defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256, 8001 load, i256mem>, VEX_4V, VEX_L, VEX_W; 8002 } 8003} 8004 8005// GF2P8MULB 8006let Constraints = "$src1 = $dst", 8007 Predicates = [HasGFNI, UseSSE2] in 8008defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop, 8009 i128mem, 1>; 8010let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { 8011 defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load, 8012 i128mem>, VEX_4V; 8013 defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load, 8014 i256mem>, VEX_4V, VEX_L; 8015} 8016// GF2P8AFFINEINVQB, GF2P8AFFINEQB 8017let isCommutable = 0 in { 8018 defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", 8019 X86GF2P8affineinvqb>, TAPD; 8020 defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", 8021 X86GF2P8affineqb>, TAPD; 8022} 8023 8024