1*0b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 2*0b57cec5SDimitry Andric * 3*0b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric * 7*0b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 8*0b57cec5SDimitry Andric */ 9*0b57cec5SDimitry Andric 10*0b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 11*0b57cec5SDimitry Andric #define __EMMINTRIN_H 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include <xmmintrin.h> 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 16*0b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 17*0b57cec5SDimitry Andric 18*0b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 19*0b57cec5SDimitry Andric typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric /* Type defines. */ 22*0b57cec5SDimitry Andric typedef double __v2df __attribute__ ((__vector_size__ (16))); 23*0b57cec5SDimitry Andric typedef long long __v2di __attribute__ ((__vector_size__ (16))); 24*0b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 25*0b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric /* Unsigned types */ 28*0b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16))); 29*0b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 30*0b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 33*0b57cec5SDimitry Andric * appear in the interface though. */ 34*0b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 37*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) 38*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 41*0b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 42*0b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 43*0b57cec5SDimitry Andric /// 44*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45*0b57cec5SDimitry Andric /// 46*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 47*0b57cec5SDimitry Andric /// 48*0b57cec5SDimitry Andric /// \param __a 49*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 50*0b57cec5SDimitry Andric /// \param __b 51*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 52*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 53*0b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 54*0b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 55*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 56*0b57cec5SDimitry Andric _mm_add_sd(__m128d __a, __m128d __b) 57*0b57cec5SDimitry Andric { 58*0b57cec5SDimitry Andric __a[0] += __b[0]; 59*0b57cec5SDimitry Andric return __a; 60*0b57cec5SDimitry Andric } 61*0b57cec5SDimitry Andric 62*0b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 63*0b57cec5SDimitry Andric /// 64*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 65*0b57cec5SDimitry Andric /// 66*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 67*0b57cec5SDimitry Andric /// 68*0b57cec5SDimitry Andric /// \param __a 69*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 70*0b57cec5SDimitry Andric /// \param __b 71*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 72*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 73*0b57cec5SDimitry Andric /// operands. 74*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 75*0b57cec5SDimitry Andric _mm_add_pd(__m128d __a, __m128d __b) 76*0b57cec5SDimitry Andric { 77*0b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 78*0b57cec5SDimitry Andric } 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 81*0b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 82*0b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 83*0b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 84*0b57cec5SDimitry Andric /// operand. 85*0b57cec5SDimitry Andric /// 86*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 87*0b57cec5SDimitry Andric /// 88*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 89*0b57cec5SDimitry Andric /// 90*0b57cec5SDimitry Andric /// \param __a 91*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 92*0b57cec5SDimitry Andric /// \param __b 93*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 94*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 95*0b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 96*0b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 97*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 98*0b57cec5SDimitry Andric _mm_sub_sd(__m128d __a, __m128d __b) 99*0b57cec5SDimitry Andric { 100*0b57cec5SDimitry Andric __a[0] -= __b[0]; 101*0b57cec5SDimitry Andric return __a; 102*0b57cec5SDimitry Andric } 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 105*0b57cec5SDimitry Andric /// 106*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 107*0b57cec5SDimitry Andric /// 108*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 109*0b57cec5SDimitry Andric /// 110*0b57cec5SDimitry Andric /// \param __a 111*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 112*0b57cec5SDimitry Andric /// \param __b 113*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 114*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 115*0b57cec5SDimitry Andric /// both operands. 116*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 117*0b57cec5SDimitry Andric _mm_sub_pd(__m128d __a, __m128d __b) 118*0b57cec5SDimitry Andric { 119*0b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 120*0b57cec5SDimitry Andric } 121*0b57cec5SDimitry Andric 122*0b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 123*0b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 124*0b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 125*0b57cec5SDimitry Andric /// operand. 126*0b57cec5SDimitry Andric /// 127*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 128*0b57cec5SDimitry Andric /// 129*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 130*0b57cec5SDimitry Andric /// 131*0b57cec5SDimitry Andric /// \param __a 132*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 133*0b57cec5SDimitry Andric /// \param __b 134*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 135*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 136*0b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 137*0b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 138*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 139*0b57cec5SDimitry Andric _mm_mul_sd(__m128d __a, __m128d __b) 140*0b57cec5SDimitry Andric { 141*0b57cec5SDimitry Andric __a[0] *= __b[0]; 142*0b57cec5SDimitry Andric return __a; 143*0b57cec5SDimitry Andric } 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 146*0b57cec5SDimitry Andric /// 147*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 148*0b57cec5SDimitry Andric /// 149*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 150*0b57cec5SDimitry Andric /// 151*0b57cec5SDimitry Andric /// \param __a 152*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 153*0b57cec5SDimitry Andric /// \param __b 154*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 155*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 156*0b57cec5SDimitry Andric /// operands. 157*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 158*0b57cec5SDimitry Andric _mm_mul_pd(__m128d __a, __m128d __b) 159*0b57cec5SDimitry Andric { 160*0b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 161*0b57cec5SDimitry Andric } 162*0b57cec5SDimitry Andric 163*0b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 164*0b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 165*0b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 166*0b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 167*0b57cec5SDimitry Andric /// operand. 168*0b57cec5SDimitry Andric /// 169*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 170*0b57cec5SDimitry Andric /// 171*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 172*0b57cec5SDimitry Andric /// 173*0b57cec5SDimitry Andric /// \param __a 174*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 175*0b57cec5SDimitry Andric /// \param __b 176*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 177*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 178*0b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 179*0b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 180*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 181*0b57cec5SDimitry Andric _mm_div_sd(__m128d __a, __m128d __b) 182*0b57cec5SDimitry Andric { 183*0b57cec5SDimitry Andric __a[0] /= __b[0]; 184*0b57cec5SDimitry Andric return __a; 185*0b57cec5SDimitry Andric } 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 188*0b57cec5SDimitry Andric /// [2 x double]. 189*0b57cec5SDimitry Andric /// 190*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 191*0b57cec5SDimitry Andric /// 192*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 193*0b57cec5SDimitry Andric /// 194*0b57cec5SDimitry Andric /// \param __a 195*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 196*0b57cec5SDimitry Andric /// \param __b 197*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 198*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 199*0b57cec5SDimitry Andric /// operands. 200*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 201*0b57cec5SDimitry Andric _mm_div_pd(__m128d __a, __m128d __b) 202*0b57cec5SDimitry Andric { 203*0b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 204*0b57cec5SDimitry Andric } 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 207*0b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 208*0b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 209*0b57cec5SDimitry Andric /// double-precision value of the first operand. 210*0b57cec5SDimitry Andric /// 211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 212*0b57cec5SDimitry Andric /// 213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 214*0b57cec5SDimitry Andric /// 215*0b57cec5SDimitry Andric /// \param __a 216*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 217*0b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 218*0b57cec5SDimitry Andric /// result. 219*0b57cec5SDimitry Andric /// \param __b 220*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 221*0b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 222*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 223*0b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 224*0b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 225*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 226*0b57cec5SDimitry Andric _mm_sqrt_sd(__m128d __a, __m128d __b) 227*0b57cec5SDimitry Andric { 228*0b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 229*0b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 230*0b57cec5SDimitry Andric } 231*0b57cec5SDimitry Andric 232*0b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 233*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 234*0b57cec5SDimitry Andric /// 235*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 236*0b57cec5SDimitry Andric /// 237*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 238*0b57cec5SDimitry Andric /// 239*0b57cec5SDimitry Andric /// \param __a 240*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 241*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 242*0b57cec5SDimitry Andric /// values in the operand. 243*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 244*0b57cec5SDimitry Andric _mm_sqrt_pd(__m128d __a) 245*0b57cec5SDimitry Andric { 246*0b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 247*0b57cec5SDimitry Andric } 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 250*0b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 251*0b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 252*0b57cec5SDimitry Andric /// double-precision value of the first operand. 253*0b57cec5SDimitry Andric /// 254*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 255*0b57cec5SDimitry Andric /// 256*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 257*0b57cec5SDimitry Andric /// 258*0b57cec5SDimitry Andric /// \param __a 259*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 260*0b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 261*0b57cec5SDimitry Andric /// \param __b 262*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 263*0b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 264*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 265*0b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 266*0b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 267*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 268*0b57cec5SDimitry Andric _mm_min_sd(__m128d __a, __m128d __b) 269*0b57cec5SDimitry Andric { 270*0b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 271*0b57cec5SDimitry Andric } 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 274*0b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the lesser of each pair of 275*0b57cec5SDimitry Andric /// values. 276*0b57cec5SDimitry Andric /// 277*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 278*0b57cec5SDimitry Andric /// 279*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 280*0b57cec5SDimitry Andric /// 281*0b57cec5SDimitry Andric /// \param __a 282*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 283*0b57cec5SDimitry Andric /// \param __b 284*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 285*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 286*0b57cec5SDimitry Andric /// between both operands. 287*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 288*0b57cec5SDimitry Andric _mm_min_pd(__m128d __a, __m128d __b) 289*0b57cec5SDimitry Andric { 290*0b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 291*0b57cec5SDimitry Andric } 292*0b57cec5SDimitry Andric 293*0b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 294*0b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 295*0b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 296*0b57cec5SDimitry Andric /// double-precision value of the first operand. 297*0b57cec5SDimitry Andric /// 298*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 299*0b57cec5SDimitry Andric /// 300*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 301*0b57cec5SDimitry Andric /// 302*0b57cec5SDimitry Andric /// \param __a 303*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 304*0b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 305*0b57cec5SDimitry Andric /// \param __b 306*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 307*0b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 308*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 309*0b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 310*0b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 311*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 312*0b57cec5SDimitry Andric _mm_max_sd(__m128d __a, __m128d __b) 313*0b57cec5SDimitry Andric { 314*0b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 315*0b57cec5SDimitry Andric } 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 318*0b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the greater of each pair 319*0b57cec5SDimitry Andric /// of values. 320*0b57cec5SDimitry Andric /// 321*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 322*0b57cec5SDimitry Andric /// 323*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 324*0b57cec5SDimitry Andric /// 325*0b57cec5SDimitry Andric /// \param __a 326*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 327*0b57cec5SDimitry Andric /// \param __b 328*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 329*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 330*0b57cec5SDimitry Andric /// between both operands. 331*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 332*0b57cec5SDimitry Andric _mm_max_pd(__m128d __a, __m128d __b) 333*0b57cec5SDimitry Andric { 334*0b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 335*0b57cec5SDimitry Andric } 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 338*0b57cec5SDimitry Andric /// 339*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 340*0b57cec5SDimitry Andric /// 341*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 342*0b57cec5SDimitry Andric /// 343*0b57cec5SDimitry Andric /// \param __a 344*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 345*0b57cec5SDimitry Andric /// \param __b 346*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 347*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 348*0b57cec5SDimitry Andric /// values between both operands. 349*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 350*0b57cec5SDimitry Andric _mm_and_pd(__m128d __a, __m128d __b) 351*0b57cec5SDimitry Andric { 352*0b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 353*0b57cec5SDimitry Andric } 354*0b57cec5SDimitry Andric 355*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 356*0b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 357*0b57cec5SDimitry Andric /// 358*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 359*0b57cec5SDimitry Andric /// 360*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 361*0b57cec5SDimitry Andric /// 362*0b57cec5SDimitry Andric /// \param __a 363*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 364*0b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 365*0b57cec5SDimitry Andric /// \param __b 366*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 367*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 368*0b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 369*0b57cec5SDimitry Andric /// operand. 370*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 371*0b57cec5SDimitry Andric _mm_andnot_pd(__m128d __a, __m128d __b) 372*0b57cec5SDimitry Andric { 373*0b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 374*0b57cec5SDimitry Andric } 375*0b57cec5SDimitry Andric 376*0b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 377*0b57cec5SDimitry Andric /// 378*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 379*0b57cec5SDimitry Andric /// 380*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 381*0b57cec5SDimitry Andric /// 382*0b57cec5SDimitry Andric /// \param __a 383*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 384*0b57cec5SDimitry Andric /// \param __b 385*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 386*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 387*0b57cec5SDimitry Andric /// values between both operands. 388*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 389*0b57cec5SDimitry Andric _mm_or_pd(__m128d __a, __m128d __b) 390*0b57cec5SDimitry Andric { 391*0b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 392*0b57cec5SDimitry Andric } 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 395*0b57cec5SDimitry Andric /// 396*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 397*0b57cec5SDimitry Andric /// 398*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 399*0b57cec5SDimitry Andric /// 400*0b57cec5SDimitry Andric /// \param __a 401*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 402*0b57cec5SDimitry Andric /// \param __b 403*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 404*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 405*0b57cec5SDimitry Andric /// values between both operands. 406*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 407*0b57cec5SDimitry Andric _mm_xor_pd(__m128d __a, __m128d __b) 408*0b57cec5SDimitry Andric { 409*0b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 410*0b57cec5SDimitry Andric } 411*0b57cec5SDimitry Andric 412*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 413*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 414*0b57cec5SDimitry Andric /// for false, 0xFFFFFFFFFFFFFFFF for true. 415*0b57cec5SDimitry Andric /// 416*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 417*0b57cec5SDimitry Andric /// 418*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 419*0b57cec5SDimitry Andric /// 420*0b57cec5SDimitry Andric /// \param __a 421*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 422*0b57cec5SDimitry Andric /// \param __b 423*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 424*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 425*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 426*0b57cec5SDimitry Andric _mm_cmpeq_pd(__m128d __a, __m128d __b) 427*0b57cec5SDimitry Andric { 428*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 429*0b57cec5SDimitry Andric } 430*0b57cec5SDimitry Andric 431*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 432*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 433*0b57cec5SDimitry Andric /// operand are less than those in the second operand. Each comparison 434*0b57cec5SDimitry Andric /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 435*0b57cec5SDimitry Andric /// 436*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 437*0b57cec5SDimitry Andric /// 438*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 439*0b57cec5SDimitry Andric /// 440*0b57cec5SDimitry Andric /// \param __a 441*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 442*0b57cec5SDimitry Andric /// \param __b 443*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 444*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 445*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 446*0b57cec5SDimitry Andric _mm_cmplt_pd(__m128d __a, __m128d __b) 447*0b57cec5SDimitry Andric { 448*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 449*0b57cec5SDimitry Andric } 450*0b57cec5SDimitry Andric 451*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 452*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 453*0b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 454*0b57cec5SDimitry Andric /// 455*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 456*0b57cec5SDimitry Andric /// 457*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 458*0b57cec5SDimitry Andric /// 459*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 460*0b57cec5SDimitry Andric /// 461*0b57cec5SDimitry Andric /// \param __a 462*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 463*0b57cec5SDimitry Andric /// \param __b 464*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 465*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 466*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 467*0b57cec5SDimitry Andric _mm_cmple_pd(__m128d __a, __m128d __b) 468*0b57cec5SDimitry Andric { 469*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 470*0b57cec5SDimitry Andric } 471*0b57cec5SDimitry Andric 472*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 473*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 474*0b57cec5SDimitry Andric /// operand are greater than those in the second operand. 475*0b57cec5SDimitry Andric /// 476*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 477*0b57cec5SDimitry Andric /// 478*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 479*0b57cec5SDimitry Andric /// 480*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 481*0b57cec5SDimitry Andric /// 482*0b57cec5SDimitry Andric /// \param __a 483*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 484*0b57cec5SDimitry Andric /// \param __b 485*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 486*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 487*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 488*0b57cec5SDimitry Andric _mm_cmpgt_pd(__m128d __a, __m128d __b) 489*0b57cec5SDimitry Andric { 490*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 491*0b57cec5SDimitry Andric } 492*0b57cec5SDimitry Andric 493*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 494*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 495*0b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 496*0b57cec5SDimitry Andric /// 497*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 498*0b57cec5SDimitry Andric /// 499*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 500*0b57cec5SDimitry Andric /// 501*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 502*0b57cec5SDimitry Andric /// 503*0b57cec5SDimitry Andric /// \param __a 504*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 505*0b57cec5SDimitry Andric /// \param __b 506*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 507*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 508*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 509*0b57cec5SDimitry Andric _mm_cmpge_pd(__m128d __a, __m128d __b) 510*0b57cec5SDimitry Andric { 511*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 512*0b57cec5SDimitry Andric } 513*0b57cec5SDimitry Andric 514*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 515*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 516*0b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 517*0b57cec5SDimitry Andric /// 518*0b57cec5SDimitry Andric /// A pair of double-precision values are "ordered" with respect to each 519*0b57cec5SDimitry Andric /// other if neither value is a NaN. Each comparison yields 0x0 for false, 520*0b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 521*0b57cec5SDimitry Andric /// 522*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 523*0b57cec5SDimitry Andric /// 524*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 525*0b57cec5SDimitry Andric /// 526*0b57cec5SDimitry Andric /// \param __a 527*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 528*0b57cec5SDimitry Andric /// \param __b 529*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 530*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 531*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 532*0b57cec5SDimitry Andric _mm_cmpord_pd(__m128d __a, __m128d __b) 533*0b57cec5SDimitry Andric { 534*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 535*0b57cec5SDimitry Andric } 536*0b57cec5SDimitry Andric 537*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 538*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 539*0b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 540*0b57cec5SDimitry Andric /// 541*0b57cec5SDimitry Andric /// A pair of double-precision values are "unordered" with respect to each 542*0b57cec5SDimitry Andric /// other if one or both values are NaN. Each comparison yields 0x0 for 543*0b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 544*0b57cec5SDimitry Andric /// 545*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 546*0b57cec5SDimitry Andric /// 547*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 548*0b57cec5SDimitry Andric /// instruction. 549*0b57cec5SDimitry Andric /// 550*0b57cec5SDimitry Andric /// \param __a 551*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 552*0b57cec5SDimitry Andric /// \param __b 553*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 554*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 555*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 556*0b57cec5SDimitry Andric _mm_cmpunord_pd(__m128d __a, __m128d __b) 557*0b57cec5SDimitry Andric { 558*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 559*0b57cec5SDimitry Andric } 560*0b57cec5SDimitry Andric 561*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 562*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 563*0b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 564*0b57cec5SDimitry Andric /// 565*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 566*0b57cec5SDimitry Andric /// 567*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 568*0b57cec5SDimitry Andric /// 569*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 570*0b57cec5SDimitry Andric /// 571*0b57cec5SDimitry Andric /// \param __a 572*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 573*0b57cec5SDimitry Andric /// \param __b 574*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 575*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 576*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 577*0b57cec5SDimitry Andric _mm_cmpneq_pd(__m128d __a, __m128d __b) 578*0b57cec5SDimitry Andric { 579*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 580*0b57cec5SDimitry Andric } 581*0b57cec5SDimitry Andric 582*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 583*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 584*0b57cec5SDimitry Andric /// operand are not less than those in the second operand. 585*0b57cec5SDimitry Andric /// 586*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 587*0b57cec5SDimitry Andric /// 588*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 589*0b57cec5SDimitry Andric /// 590*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 591*0b57cec5SDimitry Andric /// 592*0b57cec5SDimitry Andric /// \param __a 593*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 594*0b57cec5SDimitry Andric /// \param __b 595*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 596*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 597*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 598*0b57cec5SDimitry Andric _mm_cmpnlt_pd(__m128d __a, __m128d __b) 599*0b57cec5SDimitry Andric { 600*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 601*0b57cec5SDimitry Andric } 602*0b57cec5SDimitry Andric 603*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 604*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 605*0b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 606*0b57cec5SDimitry Andric /// 607*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 608*0b57cec5SDimitry Andric /// 609*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 610*0b57cec5SDimitry Andric /// 611*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 612*0b57cec5SDimitry Andric /// 613*0b57cec5SDimitry Andric /// \param __a 614*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 615*0b57cec5SDimitry Andric /// \param __b 616*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 617*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 618*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 619*0b57cec5SDimitry Andric _mm_cmpnle_pd(__m128d __a, __m128d __b) 620*0b57cec5SDimitry Andric { 621*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 622*0b57cec5SDimitry Andric } 623*0b57cec5SDimitry Andric 624*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 625*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 626*0b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 627*0b57cec5SDimitry Andric /// 628*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 629*0b57cec5SDimitry Andric /// 630*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 631*0b57cec5SDimitry Andric /// 632*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 633*0b57cec5SDimitry Andric /// 634*0b57cec5SDimitry Andric /// \param __a 635*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 636*0b57cec5SDimitry Andric /// \param __b 637*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 638*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 639*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 640*0b57cec5SDimitry Andric _mm_cmpngt_pd(__m128d __a, __m128d __b) 641*0b57cec5SDimitry Andric { 642*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 643*0b57cec5SDimitry Andric } 644*0b57cec5SDimitry Andric 645*0b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 646*0b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 647*0b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 648*0b57cec5SDimitry Andric /// 649*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 650*0b57cec5SDimitry Andric /// 651*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 652*0b57cec5SDimitry Andric /// 653*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 654*0b57cec5SDimitry Andric /// 655*0b57cec5SDimitry Andric /// \param __a 656*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 657*0b57cec5SDimitry Andric /// \param __b 658*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 659*0b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 660*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 661*0b57cec5SDimitry Andric _mm_cmpnge_pd(__m128d __a, __m128d __b) 662*0b57cec5SDimitry Andric { 663*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 664*0b57cec5SDimitry Andric } 665*0b57cec5SDimitry Andric 666*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 667*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 668*0b57cec5SDimitry Andric /// 669*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 670*0b57cec5SDimitry Andric /// 671*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 672*0b57cec5SDimitry Andric /// 673*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 674*0b57cec5SDimitry Andric /// 675*0b57cec5SDimitry Andric /// \param __a 676*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 677*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 678*0b57cec5SDimitry Andric /// \param __b 679*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 680*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 681*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 682*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 683*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 684*0b57cec5SDimitry Andric _mm_cmpeq_sd(__m128d __a, __m128d __b) 685*0b57cec5SDimitry Andric { 686*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 687*0b57cec5SDimitry Andric } 688*0b57cec5SDimitry Andric 689*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 690*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 691*0b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 692*0b57cec5SDimitry Andric /// the second parameter. 693*0b57cec5SDimitry Andric /// 694*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 695*0b57cec5SDimitry Andric /// 696*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 697*0b57cec5SDimitry Andric /// 698*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 699*0b57cec5SDimitry Andric /// 700*0b57cec5SDimitry Andric /// \param __a 701*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 702*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 703*0b57cec5SDimitry Andric /// \param __b 704*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 705*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 706*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 707*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 708*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 709*0b57cec5SDimitry Andric _mm_cmplt_sd(__m128d __a, __m128d __b) 710*0b57cec5SDimitry Andric { 711*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 712*0b57cec5SDimitry Andric } 713*0b57cec5SDimitry Andric 714*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 715*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 716*0b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 717*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 718*0b57cec5SDimitry Andric /// 719*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 720*0b57cec5SDimitry Andric /// 721*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 722*0b57cec5SDimitry Andric /// 723*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 724*0b57cec5SDimitry Andric /// 725*0b57cec5SDimitry Andric /// \param __a 726*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 727*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 728*0b57cec5SDimitry Andric /// \param __b 729*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 730*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 731*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 732*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 733*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 734*0b57cec5SDimitry Andric _mm_cmple_sd(__m128d __a, __m128d __b) 735*0b57cec5SDimitry Andric { 736*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 737*0b57cec5SDimitry Andric } 738*0b57cec5SDimitry Andric 739*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 740*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 741*0b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 742*0b57cec5SDimitry Andric /// in the second parameter. 743*0b57cec5SDimitry Andric /// 744*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 745*0b57cec5SDimitry Andric /// 746*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 747*0b57cec5SDimitry Andric /// 748*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 749*0b57cec5SDimitry Andric /// 750*0b57cec5SDimitry Andric /// \param __a 751*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 752*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 753*0b57cec5SDimitry Andric /// \param __b 754*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 755*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 756*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 757*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 758*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 759*0b57cec5SDimitry Andric _mm_cmpgt_sd(__m128d __a, __m128d __b) 760*0b57cec5SDimitry Andric { 761*0b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 762*0b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 763*0b57cec5SDimitry Andric } 764*0b57cec5SDimitry Andric 765*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 766*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 767*0b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 768*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 769*0b57cec5SDimitry Andric /// 770*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 771*0b57cec5SDimitry Andric /// 772*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 773*0b57cec5SDimitry Andric /// 774*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 775*0b57cec5SDimitry Andric /// 776*0b57cec5SDimitry Andric /// \param __a 777*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 778*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 779*0b57cec5SDimitry Andric /// \param __b 780*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 781*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 782*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 783*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 784*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 785*0b57cec5SDimitry Andric _mm_cmpge_sd(__m128d __a, __m128d __b) 786*0b57cec5SDimitry Andric { 787*0b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 788*0b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 789*0b57cec5SDimitry Andric } 790*0b57cec5SDimitry Andric 791*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 792*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 793*0b57cec5SDimitry Andric /// the value in the first parameter is "ordered" with respect to the 794*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 795*0b57cec5SDimitry Andric /// 796*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 797*0b57cec5SDimitry Andric /// of double-precision values are "ordered" with respect to each other if 798*0b57cec5SDimitry Andric /// neither value is a NaN. 799*0b57cec5SDimitry Andric /// 800*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 801*0b57cec5SDimitry Andric /// 802*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 803*0b57cec5SDimitry Andric /// 804*0b57cec5SDimitry Andric /// \param __a 805*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 806*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 807*0b57cec5SDimitry Andric /// \param __b 808*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 809*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 810*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 811*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 812*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 813*0b57cec5SDimitry Andric _mm_cmpord_sd(__m128d __a, __m128d __b) 814*0b57cec5SDimitry Andric { 815*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 816*0b57cec5SDimitry Andric } 817*0b57cec5SDimitry Andric 818*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 819*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 820*0b57cec5SDimitry Andric /// the value in the first parameter is "unordered" with respect to the 821*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 822*0b57cec5SDimitry Andric /// 823*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 824*0b57cec5SDimitry Andric /// of double-precision values are "unordered" with respect to each other if 825*0b57cec5SDimitry Andric /// one or both values are NaN. 826*0b57cec5SDimitry Andric /// 827*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 828*0b57cec5SDimitry Andric /// 829*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 830*0b57cec5SDimitry Andric /// instruction. 831*0b57cec5SDimitry Andric /// 832*0b57cec5SDimitry Andric /// \param __a 833*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 834*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 835*0b57cec5SDimitry Andric /// \param __b 836*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 837*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 838*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 839*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 840*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 841*0b57cec5SDimitry Andric _mm_cmpunord_sd(__m128d __a, __m128d __b) 842*0b57cec5SDimitry Andric { 843*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 844*0b57cec5SDimitry Andric } 845*0b57cec5SDimitry Andric 846*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 847*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 848*0b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 849*0b57cec5SDimitry Andric /// the second parameter. 850*0b57cec5SDimitry Andric /// 851*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 852*0b57cec5SDimitry Andric /// 853*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 854*0b57cec5SDimitry Andric /// 855*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 856*0b57cec5SDimitry Andric /// 857*0b57cec5SDimitry Andric /// \param __a 858*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 859*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 860*0b57cec5SDimitry Andric /// \param __b 861*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 862*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 863*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 864*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 865*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 866*0b57cec5SDimitry Andric _mm_cmpneq_sd(__m128d __a, __m128d __b) 867*0b57cec5SDimitry Andric { 868*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 869*0b57cec5SDimitry Andric } 870*0b57cec5SDimitry Andric 871*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 872*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 873*0b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 874*0b57cec5SDimitry Andric /// value in the second parameter. 875*0b57cec5SDimitry Andric /// 876*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 877*0b57cec5SDimitry Andric /// 878*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 879*0b57cec5SDimitry Andric /// 880*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 881*0b57cec5SDimitry Andric /// 882*0b57cec5SDimitry Andric /// \param __a 883*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 884*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 885*0b57cec5SDimitry Andric /// \param __b 886*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 887*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 888*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 889*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 890*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 891*0b57cec5SDimitry Andric _mm_cmpnlt_sd(__m128d __a, __m128d __b) 892*0b57cec5SDimitry Andric { 893*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 894*0b57cec5SDimitry Andric } 895*0b57cec5SDimitry Andric 896*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 897*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 898*0b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 899*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 900*0b57cec5SDimitry Andric /// 901*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 902*0b57cec5SDimitry Andric /// 903*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 904*0b57cec5SDimitry Andric /// 905*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 906*0b57cec5SDimitry Andric /// 907*0b57cec5SDimitry Andric /// \param __a 908*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 909*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 910*0b57cec5SDimitry Andric /// \param __b 911*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 912*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 913*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 914*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 915*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 916*0b57cec5SDimitry Andric _mm_cmpnle_sd(__m128d __a, __m128d __b) 917*0b57cec5SDimitry Andric { 918*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 919*0b57cec5SDimitry Andric } 920*0b57cec5SDimitry Andric 921*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 922*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 923*0b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 924*0b57cec5SDimitry Andric /// value in the second parameter. 925*0b57cec5SDimitry Andric /// 926*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 927*0b57cec5SDimitry Andric /// 928*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 929*0b57cec5SDimitry Andric /// 930*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 931*0b57cec5SDimitry Andric /// 932*0b57cec5SDimitry Andric /// \param __a 933*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 934*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 935*0b57cec5SDimitry Andric /// \param __b 936*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 937*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 938*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 939*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 940*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 941*0b57cec5SDimitry Andric _mm_cmpngt_sd(__m128d __a, __m128d __b) 942*0b57cec5SDimitry Andric { 943*0b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 944*0b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 945*0b57cec5SDimitry Andric } 946*0b57cec5SDimitry Andric 947*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 948*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 949*0b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 950*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 951*0b57cec5SDimitry Andric /// 952*0b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 953*0b57cec5SDimitry Andric /// 954*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 955*0b57cec5SDimitry Andric /// 956*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 957*0b57cec5SDimitry Andric /// 958*0b57cec5SDimitry Andric /// \param __a 959*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 960*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 961*0b57cec5SDimitry Andric /// \param __b 962*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 963*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 964*0b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 965*0b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 966*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 967*0b57cec5SDimitry Andric _mm_cmpnge_sd(__m128d __a, __m128d __b) 968*0b57cec5SDimitry Andric { 969*0b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 970*0b57cec5SDimitry Andric return __extension__ (__m128d) { __c[0], __a[1] }; 971*0b57cec5SDimitry Andric } 972*0b57cec5SDimitry Andric 973*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 974*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 975*0b57cec5SDimitry Andric /// 976*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 977*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 978*0b57cec5SDimitry Andric /// 979*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 980*0b57cec5SDimitry Andric /// 981*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 982*0b57cec5SDimitry Andric /// 983*0b57cec5SDimitry Andric /// \param __a 984*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 985*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 986*0b57cec5SDimitry Andric /// \param __b 987*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 988*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 989*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 990*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 991*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 992*0b57cec5SDimitry Andric _mm_comieq_sd(__m128d __a, __m128d __b) 993*0b57cec5SDimitry Andric { 994*0b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 995*0b57cec5SDimitry Andric } 996*0b57cec5SDimitry Andric 997*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 998*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 999*0b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 1000*0b57cec5SDimitry Andric /// the second parameter. 1001*0b57cec5SDimitry Andric /// 1002*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1003*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1004*0b57cec5SDimitry Andric /// 1005*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1006*0b57cec5SDimitry Andric /// 1007*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 1008*0b57cec5SDimitry Andric /// 1009*0b57cec5SDimitry Andric /// \param __a 1010*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1011*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1012*0b57cec5SDimitry Andric /// \param __b 1013*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1014*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1015*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1016*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1017*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1018*0b57cec5SDimitry Andric _mm_comilt_sd(__m128d __a, __m128d __b) 1019*0b57cec5SDimitry Andric { 1020*0b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 1021*0b57cec5SDimitry Andric } 1022*0b57cec5SDimitry Andric 1023*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1024*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1025*0b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 1026*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 1027*0b57cec5SDimitry Andric /// 1028*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1029*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1030*0b57cec5SDimitry Andric /// 1031*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1032*0b57cec5SDimitry Andric /// 1033*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 1034*0b57cec5SDimitry Andric /// 1035*0b57cec5SDimitry Andric /// \param __a 1036*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1037*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1038*0b57cec5SDimitry Andric /// \param __b 1039*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1040*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1041*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1042*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1043*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1044*0b57cec5SDimitry Andric _mm_comile_sd(__m128d __a, __m128d __b) 1045*0b57cec5SDimitry Andric { 1046*0b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 1047*0b57cec5SDimitry Andric } 1048*0b57cec5SDimitry Andric 1049*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1050*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1051*0b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 1052*0b57cec5SDimitry Andric /// in the second parameter. 1053*0b57cec5SDimitry Andric /// 1054*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1055*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1056*0b57cec5SDimitry Andric /// 1057*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1058*0b57cec5SDimitry Andric /// 1059*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 1060*0b57cec5SDimitry Andric /// 1061*0b57cec5SDimitry Andric /// \param __a 1062*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1063*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1064*0b57cec5SDimitry Andric /// \param __b 1065*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1066*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1067*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1068*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1069*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1070*0b57cec5SDimitry Andric _mm_comigt_sd(__m128d __a, __m128d __b) 1071*0b57cec5SDimitry Andric { 1072*0b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 1073*0b57cec5SDimitry Andric } 1074*0b57cec5SDimitry Andric 1075*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1076*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1077*0b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 1078*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 1079*0b57cec5SDimitry Andric /// 1080*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1081*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1082*0b57cec5SDimitry Andric /// 1083*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1084*0b57cec5SDimitry Andric /// 1085*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 1086*0b57cec5SDimitry Andric /// 1087*0b57cec5SDimitry Andric /// \param __a 1088*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1089*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1090*0b57cec5SDimitry Andric /// \param __b 1091*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1092*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1093*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1094*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1095*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1096*0b57cec5SDimitry Andric _mm_comige_sd(__m128d __a, __m128d __b) 1097*0b57cec5SDimitry Andric { 1098*0b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 1099*0b57cec5SDimitry Andric } 1100*0b57cec5SDimitry Andric 1101*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1102*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1103*0b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 1104*0b57cec5SDimitry Andric /// the second parameter. 1105*0b57cec5SDimitry Andric /// 1106*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1107*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 1108*0b57cec5SDimitry Andric /// 1109*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1110*0b57cec5SDimitry Andric /// 1111*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 1112*0b57cec5SDimitry Andric /// 1113*0b57cec5SDimitry Andric /// \param __a 1114*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1115*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1116*0b57cec5SDimitry Andric /// \param __b 1117*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1118*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1119*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1120*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 1121*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1122*0b57cec5SDimitry Andric _mm_comineq_sd(__m128d __a, __m128d __b) 1123*0b57cec5SDimitry Andric { 1124*0b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 1125*0b57cec5SDimitry Andric } 1126*0b57cec5SDimitry Andric 1127*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1128*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. The 1129*0b57cec5SDimitry Andric /// comparison yields 0 for false, 1 for true. 1130*0b57cec5SDimitry Andric /// 1131*0b57cec5SDimitry Andric /// If either of the two lower double-precision values is NaN, 0 is returned. 1132*0b57cec5SDimitry Andric /// 1133*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1134*0b57cec5SDimitry Andric /// 1135*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1136*0b57cec5SDimitry Andric /// 1137*0b57cec5SDimitry Andric /// \param __a 1138*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1139*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1140*0b57cec5SDimitry Andric /// \param __b 1141*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1142*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1143*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1144*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1145*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1146*0b57cec5SDimitry Andric _mm_ucomieq_sd(__m128d __a, __m128d __b) 1147*0b57cec5SDimitry Andric { 1148*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 1149*0b57cec5SDimitry Andric } 1150*0b57cec5SDimitry Andric 1151*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1152*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1153*0b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 1154*0b57cec5SDimitry Andric /// the second parameter. 1155*0b57cec5SDimitry Andric /// 1156*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 1157*0b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 1158*0b57cec5SDimitry Andric /// 1159*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1160*0b57cec5SDimitry Andric /// 1161*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1162*0b57cec5SDimitry Andric /// 1163*0b57cec5SDimitry Andric /// \param __a 1164*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1165*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1166*0b57cec5SDimitry Andric /// \param __b 1167*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1168*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1169*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1170*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1171*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1172*0b57cec5SDimitry Andric _mm_ucomilt_sd(__m128d __a, __m128d __b) 1173*0b57cec5SDimitry Andric { 1174*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 1175*0b57cec5SDimitry Andric } 1176*0b57cec5SDimitry Andric 1177*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1178*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1179*0b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 1180*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 1181*0b57cec5SDimitry Andric /// 1182*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 1183*0b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 1184*0b57cec5SDimitry Andric /// 1185*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1186*0b57cec5SDimitry Andric /// 1187*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1188*0b57cec5SDimitry Andric /// 1189*0b57cec5SDimitry Andric /// \param __a 1190*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1191*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1192*0b57cec5SDimitry Andric /// \param __b 1193*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1194*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1195*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1196*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1197*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1198*0b57cec5SDimitry Andric _mm_ucomile_sd(__m128d __a, __m128d __b) 1199*0b57cec5SDimitry Andric { 1200*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 1201*0b57cec5SDimitry Andric } 1202*0b57cec5SDimitry Andric 1203*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1204*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1205*0b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 1206*0b57cec5SDimitry Andric /// in the second parameter. 1207*0b57cec5SDimitry Andric /// 1208*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 1209*0b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 1210*0b57cec5SDimitry Andric /// 1211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1212*0b57cec5SDimitry Andric /// 1213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1214*0b57cec5SDimitry Andric /// 1215*0b57cec5SDimitry Andric /// \param __a 1216*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1217*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1218*0b57cec5SDimitry Andric /// \param __b 1219*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1220*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1221*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1222*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1223*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1224*0b57cec5SDimitry Andric _mm_ucomigt_sd(__m128d __a, __m128d __b) 1225*0b57cec5SDimitry Andric { 1226*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 1227*0b57cec5SDimitry Andric } 1228*0b57cec5SDimitry Andric 1229*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1230*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1231*0b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 1232*0b57cec5SDimitry Andric /// corresponding value in the second parameter. 1233*0b57cec5SDimitry Andric /// 1234*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 1235*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1236*0b57cec5SDimitry Andric /// 1237*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1238*0b57cec5SDimitry Andric /// 1239*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1240*0b57cec5SDimitry Andric /// 1241*0b57cec5SDimitry Andric /// \param __a 1242*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1243*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1244*0b57cec5SDimitry Andric /// \param __b 1245*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1246*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1247*0b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 1248*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1249*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1250*0b57cec5SDimitry Andric _mm_ucomige_sd(__m128d __a, __m128d __b) 1251*0b57cec5SDimitry Andric { 1252*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 1253*0b57cec5SDimitry Andric } 1254*0b57cec5SDimitry Andric 1255*0b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 1256*0b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 1257*0b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 1258*0b57cec5SDimitry Andric /// the second parameter. 1259*0b57cec5SDimitry Andric /// 1260*0b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 1261*0b57cec5SDimitry Andric /// double-precision values is NaN, 1 is returned. 1262*0b57cec5SDimitry Andric /// 1263*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1264*0b57cec5SDimitry Andric /// 1265*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 1266*0b57cec5SDimitry Andric /// 1267*0b57cec5SDimitry Andric /// \param __a 1268*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1269*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 1270*0b57cec5SDimitry Andric /// \param __b 1271*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 1272*0b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 1273*0b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two 1274*0b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 1275*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1276*0b57cec5SDimitry Andric _mm_ucomineq_sd(__m128d __a, __m128d __b) 1277*0b57cec5SDimitry Andric { 1278*0b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 1279*0b57cec5SDimitry Andric } 1280*0b57cec5SDimitry Andric 1281*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 1282*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 1283*0b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 1284*0b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 1285*0b57cec5SDimitry Andric /// 1286*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1287*0b57cec5SDimitry Andric /// 1288*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 1289*0b57cec5SDimitry Andric /// 1290*0b57cec5SDimitry Andric /// \param __a 1291*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 1292*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 1293*0b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1294*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 1295*0b57cec5SDimitry Andric _mm_cvtpd_ps(__m128d __a) 1296*0b57cec5SDimitry Andric { 1297*0b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 1298*0b57cec5SDimitry Andric } 1299*0b57cec5SDimitry Andric 1300*0b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 1301*0b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 1302*0b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 1303*0b57cec5SDimitry Andric /// elements of the input vector are unused. 1304*0b57cec5SDimitry Andric /// 1305*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1306*0b57cec5SDimitry Andric /// 1307*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 1308*0b57cec5SDimitry Andric /// 1309*0b57cec5SDimitry Andric /// \param __a 1310*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 1311*0b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 1312*0b57cec5SDimitry Andric /// upper two elements are unused. 1313*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1314*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1315*0b57cec5SDimitry Andric _mm_cvtps_pd(__m128 __a) 1316*0b57cec5SDimitry Andric { 1317*0b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 1318*0b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 1319*0b57cec5SDimitry Andric } 1320*0b57cec5SDimitry Andric 1321*0b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 1322*0b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 1323*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 1324*0b57cec5SDimitry Andric /// 1325*0b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 1326*0b57cec5SDimitry Andric /// 1327*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1328*0b57cec5SDimitry Andric /// 1329*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 1330*0b57cec5SDimitry Andric /// 1331*0b57cec5SDimitry Andric /// \param __a 1332*0b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 1333*0b57cec5SDimitry Andric /// converted to double-precision values. 1334*0b57cec5SDimitry Andric /// 1335*0b57cec5SDimitry Andric /// The upper two elements are unused. 1336*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1337*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1338*0b57cec5SDimitry Andric _mm_cvtepi32_pd(__m128i __a) 1339*0b57cec5SDimitry Andric { 1340*0b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 1341*0b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 1342*0b57cec5SDimitry Andric } 1343*0b57cec5SDimitry Andric 1344*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 1345*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 1346*0b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 1347*0b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 1348*0b57cec5SDimitry Andric /// 1349*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1350*0b57cec5SDimitry Andric /// 1351*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 1352*0b57cec5SDimitry Andric /// 1353*0b57cec5SDimitry Andric /// \param __a 1354*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 1355*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 1356*0b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1357*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1358*0b57cec5SDimitry Andric _mm_cvtpd_epi32(__m128d __a) 1359*0b57cec5SDimitry Andric { 1360*0b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 1361*0b57cec5SDimitry Andric } 1362*0b57cec5SDimitry Andric 1363*0b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 1364*0b57cec5SDimitry Andric /// into a 32-bit signed integer value. 1365*0b57cec5SDimitry Andric /// 1366*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1367*0b57cec5SDimitry Andric /// 1368*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 1369*0b57cec5SDimitry Andric /// 1370*0b57cec5SDimitry Andric /// \param __a 1371*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 1372*0b57cec5SDimitry Andric /// conversion. 1373*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 1374*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1375*0b57cec5SDimitry Andric _mm_cvtsd_si32(__m128d __a) 1376*0b57cec5SDimitry Andric { 1377*0b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 1378*0b57cec5SDimitry Andric } 1379*0b57cec5SDimitry Andric 1380*0b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 1381*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 1382*0b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 1383*0b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 1384*0b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 1385*0b57cec5SDimitry Andric /// 1386*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1387*0b57cec5SDimitry Andric /// 1388*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 1389*0b57cec5SDimitry Andric /// 1390*0b57cec5SDimitry Andric /// \param __a 1391*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 1392*0b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 1393*0b57cec5SDimitry Andric /// \param __b 1394*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 1395*0b57cec5SDimitry Andric /// floating-point element is used in the conversion. 1396*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 1397*0b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 1398*0b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 1399*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 1400*0b57cec5SDimitry Andric _mm_cvtsd_ss(__m128 __a, __m128d __b) 1401*0b57cec5SDimitry Andric { 1402*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 1403*0b57cec5SDimitry Andric } 1404*0b57cec5SDimitry Andric 1405*0b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 1406*0b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 1407*0b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 1408*0b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 1409*0b57cec5SDimitry Andric /// 1410*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1411*0b57cec5SDimitry Andric /// 1412*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 1413*0b57cec5SDimitry Andric /// 1414*0b57cec5SDimitry Andric /// \param __a 1415*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 1416*0b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 1417*0b57cec5SDimitry Andric /// \param __b 1418*0b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 1419*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 1420*0b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 1421*0b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 1422*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1423*0b57cec5SDimitry Andric _mm_cvtsi32_sd(__m128d __a, int __b) 1424*0b57cec5SDimitry Andric { 1425*0b57cec5SDimitry Andric __a[0] = __b; 1426*0b57cec5SDimitry Andric return __a; 1427*0b57cec5SDimitry Andric } 1428*0b57cec5SDimitry Andric 1429*0b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 1430*0b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 1431*0b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 1432*0b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 1433*0b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 1434*0b57cec5SDimitry Andric /// 1435*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1436*0b57cec5SDimitry Andric /// 1437*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 1438*0b57cec5SDimitry Andric /// 1439*0b57cec5SDimitry Andric /// \param __a 1440*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 1441*0b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 1442*0b57cec5SDimitry Andric /// \param __b 1443*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 1444*0b57cec5SDimitry Andric /// floating-point element is used in the conversion. 1445*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 1446*0b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 1447*0b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 1448*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1449*0b57cec5SDimitry Andric _mm_cvtss_sd(__m128d __a, __m128 __b) 1450*0b57cec5SDimitry Andric { 1451*0b57cec5SDimitry Andric __a[0] = __b[0]; 1452*0b57cec5SDimitry Andric return __a; 1453*0b57cec5SDimitry Andric } 1454*0b57cec5SDimitry Andric 1455*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 1456*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 1457*0b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. 1458*0b57cec5SDimitry Andric /// 1459*0b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 1460*0b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. The upper 1461*0b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 1462*0b57cec5SDimitry Andric /// 1463*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1464*0b57cec5SDimitry Andric /// 1465*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 1466*0b57cec5SDimitry Andric /// instruction. 1467*0b57cec5SDimitry Andric /// 1468*0b57cec5SDimitry Andric /// \param __a 1469*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 1470*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 1471*0b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1472*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1473*0b57cec5SDimitry Andric _mm_cvttpd_epi32(__m128d __a) 1474*0b57cec5SDimitry Andric { 1475*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 1476*0b57cec5SDimitry Andric } 1477*0b57cec5SDimitry Andric 1478*0b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 1479*0b57cec5SDimitry Andric /// signed integer value, truncating the result when it is inexact. 1480*0b57cec5SDimitry Andric /// 1481*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1482*0b57cec5SDimitry Andric /// 1483*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 1484*0b57cec5SDimitry Andric /// instruction. 1485*0b57cec5SDimitry Andric /// 1486*0b57cec5SDimitry Andric /// \param __a 1487*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 1488*0b57cec5SDimitry Andric /// conversion. 1489*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 1490*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 1491*0b57cec5SDimitry Andric _mm_cvttsd_si32(__m128d __a) 1492*0b57cec5SDimitry Andric { 1493*0b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 1494*0b57cec5SDimitry Andric } 1495*0b57cec5SDimitry Andric 1496*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 1497*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 1498*0b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 1499*0b57cec5SDimitry Andric /// 1500*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1501*0b57cec5SDimitry Andric /// 1502*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 1503*0b57cec5SDimitry Andric /// 1504*0b57cec5SDimitry Andric /// \param __a 1505*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 1506*0b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 1507*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 1508*0b57cec5SDimitry Andric _mm_cvtpd_pi32(__m128d __a) 1509*0b57cec5SDimitry Andric { 1510*0b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 1511*0b57cec5SDimitry Andric } 1512*0b57cec5SDimitry Andric 1513*0b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 1514*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 1515*0b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 1516*0b57cec5SDimitry Andric /// 1517*0b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 1518*0b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. 1519*0b57cec5SDimitry Andric /// 1520*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1521*0b57cec5SDimitry Andric /// 1522*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 1523*0b57cec5SDimitry Andric /// 1524*0b57cec5SDimitry Andric /// \param __a 1525*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 1526*0b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 1527*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 1528*0b57cec5SDimitry Andric _mm_cvttpd_pi32(__m128d __a) 1529*0b57cec5SDimitry Andric { 1530*0b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 1531*0b57cec5SDimitry Andric } 1532*0b57cec5SDimitry Andric 1533*0b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 1534*0b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 1535*0b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 1536*0b57cec5SDimitry Andric /// 1537*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1538*0b57cec5SDimitry Andric /// 1539*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 1540*0b57cec5SDimitry Andric /// 1541*0b57cec5SDimitry Andric /// \param __a 1542*0b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 1543*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1544*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX 1545*0b57cec5SDimitry Andric _mm_cvtpi32_pd(__m64 __a) 1546*0b57cec5SDimitry Andric { 1547*0b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 1548*0b57cec5SDimitry Andric } 1549*0b57cec5SDimitry Andric 1550*0b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 1551*0b57cec5SDimitry Andric /// a double-precision floating-point value. 1552*0b57cec5SDimitry Andric /// 1553*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1554*0b57cec5SDimitry Andric /// 1555*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 1556*0b57cec5SDimitry Andric /// 1557*0b57cec5SDimitry Andric /// \param __a 1558*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 1559*0b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 1560*0b57cec5SDimitry Andric /// bits of \a __a. 1561*0b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS 1562*0b57cec5SDimitry Andric _mm_cvtsd_f64(__m128d __a) 1563*0b57cec5SDimitry Andric { 1564*0b57cec5SDimitry Andric return __a[0]; 1565*0b57cec5SDimitry Andric } 1566*0b57cec5SDimitry Andric 1567*0b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 1568*0b57cec5SDimitry Andric /// memory location. 1569*0b57cec5SDimitry Andric /// 1570*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1571*0b57cec5SDimitry Andric /// 1572*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 1573*0b57cec5SDimitry Andric /// 1574*0b57cec5SDimitry Andric /// \param __dp 1575*0b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 1576*0b57cec5SDimitry Andric /// location has to be 16-byte aligned. 1577*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 1578*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1579*0b57cec5SDimitry Andric _mm_load_pd(double const *__dp) 1580*0b57cec5SDimitry Andric { 1581*0b57cec5SDimitry Andric return *(__m128d*)__dp; 1582*0b57cec5SDimitry Andric } 1583*0b57cec5SDimitry Andric 1584*0b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 1585*0b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 1586*0b57cec5SDimitry Andric /// [2 x double]. 1587*0b57cec5SDimitry Andric /// 1588*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1589*0b57cec5SDimitry Andric /// 1590*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 1591*0b57cec5SDimitry Andric /// 1592*0b57cec5SDimitry Andric /// \param __dp 1593*0b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 1594*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 1595*0b57cec5SDimitry Andric /// duplicated values. 1596*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1597*0b57cec5SDimitry Andric _mm_load1_pd(double const *__dp) 1598*0b57cec5SDimitry Andric { 1599*0b57cec5SDimitry Andric struct __mm_load1_pd_struct { 1600*0b57cec5SDimitry Andric double __u; 1601*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1602*0b57cec5SDimitry Andric double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; 1603*0b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __u }; 1604*0b57cec5SDimitry Andric } 1605*0b57cec5SDimitry Andric 1606*0b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 1607*0b57cec5SDimitry Andric 1608*0b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 1609*0b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 1610*0b57cec5SDimitry Andric /// 1611*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1612*0b57cec5SDimitry Andric /// 1613*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 1614*0b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 1615*0b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 1616*0b57cec5SDimitry Andric /// 1617*0b57cec5SDimitry Andric /// \param __dp 1618*0b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 1619*0b57cec5SDimitry Andric /// loaded in reverse order. 1620*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 1621*0b57cec5SDimitry Andric /// values. 1622*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1623*0b57cec5SDimitry Andric _mm_loadr_pd(double const *__dp) 1624*0b57cec5SDimitry Andric { 1625*0b57cec5SDimitry Andric __m128d __u = *(__m128d*)__dp; 1626*0b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 1627*0b57cec5SDimitry Andric } 1628*0b57cec5SDimitry Andric 1629*0b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 1630*0b57cec5SDimitry Andric /// unaligned memory location. 1631*0b57cec5SDimitry Andric /// 1632*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1633*0b57cec5SDimitry Andric /// 1634*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 1635*0b57cec5SDimitry Andric /// 1636*0b57cec5SDimitry Andric /// \param __dp 1637*0b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 1638*0b57cec5SDimitry Andric /// location does not have to be aligned. 1639*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 1640*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1641*0b57cec5SDimitry Andric _mm_loadu_pd(double const *__dp) 1642*0b57cec5SDimitry Andric { 1643*0b57cec5SDimitry Andric struct __loadu_pd { 1644*0b57cec5SDimitry Andric __m128d_u __v; 1645*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1646*0b57cec5SDimitry Andric return ((struct __loadu_pd*)__dp)->__v; 1647*0b57cec5SDimitry Andric } 1648*0b57cec5SDimitry Andric 1649*0b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 1650*0b57cec5SDimitry Andric /// vector and clears the upper element. 1651*0b57cec5SDimitry Andric /// 1652*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1653*0b57cec5SDimitry Andric /// 1654*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 1655*0b57cec5SDimitry Andric /// 1656*0b57cec5SDimitry Andric /// \param __a 1657*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 1658*0b57cec5SDimitry Andric /// location does not have to be aligned. 1659*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 1660*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1661*0b57cec5SDimitry Andric _mm_loadu_si64(void const *__a) 1662*0b57cec5SDimitry Andric { 1663*0b57cec5SDimitry Andric struct __loadu_si64 { 1664*0b57cec5SDimitry Andric long long __v; 1665*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1666*0b57cec5SDimitry Andric long long __u = ((struct __loadu_si64*)__a)->__v; 1667*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){__u, 0LL}; 1668*0b57cec5SDimitry Andric } 1669*0b57cec5SDimitry Andric 1670*0b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 1671*0b57cec5SDimitry Andric /// vector and clears the upper element. 1672*0b57cec5SDimitry Andric /// 1673*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1674*0b57cec5SDimitry Andric /// 1675*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 1676*0b57cec5SDimitry Andric /// 1677*0b57cec5SDimitry Andric /// \param __a 1678*0b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 1679*0b57cec5SDimitry Andric /// location does not have to be aligned. 1680*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 1681*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1682*0b57cec5SDimitry Andric _mm_loadu_si32(void const *__a) 1683*0b57cec5SDimitry Andric { 1684*0b57cec5SDimitry Andric struct __loadu_si32 { 1685*0b57cec5SDimitry Andric int __v; 1686*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1687*0b57cec5SDimitry Andric int __u = ((struct __loadu_si32*)__a)->__v; 1688*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; 1689*0b57cec5SDimitry Andric } 1690*0b57cec5SDimitry Andric 1691*0b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 1692*0b57cec5SDimitry Andric /// vector and clears the upper element. 1693*0b57cec5SDimitry Andric /// 1694*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1695*0b57cec5SDimitry Andric /// 1696*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 1697*0b57cec5SDimitry Andric /// 1698*0b57cec5SDimitry Andric /// \param __a 1699*0b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 1700*0b57cec5SDimitry Andric /// location does not have to be aligned. 1701*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 1702*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1703*0b57cec5SDimitry Andric _mm_loadu_si16(void const *__a) 1704*0b57cec5SDimitry Andric { 1705*0b57cec5SDimitry Andric struct __loadu_si16 { 1706*0b57cec5SDimitry Andric short __v; 1707*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1708*0b57cec5SDimitry Andric short __u = ((struct __loadu_si16*)__a)->__v; 1709*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 1710*0b57cec5SDimitry Andric } 1711*0b57cec5SDimitry Andric 1712*0b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 1713*0b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 1714*0b57cec5SDimitry Andric /// 1715*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1716*0b57cec5SDimitry Andric /// 1717*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 1718*0b57cec5SDimitry Andric /// 1719*0b57cec5SDimitry Andric /// \param __dp 1720*0b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 1721*0b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 1722*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 1723*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1724*0b57cec5SDimitry Andric _mm_load_sd(double const *__dp) 1725*0b57cec5SDimitry Andric { 1726*0b57cec5SDimitry Andric struct __mm_load_sd_struct { 1727*0b57cec5SDimitry Andric double __u; 1728*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1729*0b57cec5SDimitry Andric double __u = ((struct __mm_load_sd_struct*)__dp)->__u; 1730*0b57cec5SDimitry Andric return __extension__ (__m128d){ __u, 0 }; 1731*0b57cec5SDimitry Andric } 1732*0b57cec5SDimitry Andric 1733*0b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 1734*0b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 1735*0b57cec5SDimitry Andric /// bits of the first operand. 1736*0b57cec5SDimitry Andric /// 1737*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1738*0b57cec5SDimitry Andric /// 1739*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 1740*0b57cec5SDimitry Andric /// 1741*0b57cec5SDimitry Andric /// \param __a 1742*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 1743*0b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 1744*0b57cec5SDimitry Andric /// \param __dp 1745*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 1746*0b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 1747*0b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 1748*0b57cec5SDimitry Andric /// to be aligned. 1749*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1750*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1751*0b57cec5SDimitry Andric _mm_loadh_pd(__m128d __a, double const *__dp) 1752*0b57cec5SDimitry Andric { 1753*0b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 1754*0b57cec5SDimitry Andric double __u; 1755*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1756*0b57cec5SDimitry Andric double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; 1757*0b57cec5SDimitry Andric return __extension__ (__m128d){ __a[0], __u }; 1758*0b57cec5SDimitry Andric } 1759*0b57cec5SDimitry Andric 1760*0b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 1761*0b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 1762*0b57cec5SDimitry Andric /// high-order bits of the first operand. 1763*0b57cec5SDimitry Andric /// 1764*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1765*0b57cec5SDimitry Andric /// 1766*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 1767*0b57cec5SDimitry Andric /// 1768*0b57cec5SDimitry Andric /// \param __a 1769*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 1770*0b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 1771*0b57cec5SDimitry Andric /// \param __dp 1772*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 1773*0b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 1774*0b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 1775*0b57cec5SDimitry Andric /// be aligned. 1776*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1777*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1778*0b57cec5SDimitry Andric _mm_loadl_pd(__m128d __a, double const *__dp) 1779*0b57cec5SDimitry Andric { 1780*0b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 1781*0b57cec5SDimitry Andric double __u; 1782*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1783*0b57cec5SDimitry Andric double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; 1784*0b57cec5SDimitry Andric return __extension__ (__m128d){ __u, __a[1] }; 1785*0b57cec5SDimitry Andric } 1786*0b57cec5SDimitry Andric 1787*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 1788*0b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 1789*0b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 1790*0b57cec5SDimitry Andric /// actually used. 1791*0b57cec5SDimitry Andric /// 1792*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1793*0b57cec5SDimitry Andric /// 1794*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 1795*0b57cec5SDimitry Andric /// 1796*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 1797*0b57cec5SDimitry Andric /// content. 1798*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1799*0b57cec5SDimitry Andric _mm_undefined_pd(void) 1800*0b57cec5SDimitry Andric { 1801*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 1802*0b57cec5SDimitry Andric } 1803*0b57cec5SDimitry Andric 1804*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 1805*0b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 1806*0b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 1807*0b57cec5SDimitry Andric /// 1808*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1809*0b57cec5SDimitry Andric /// 1810*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 1811*0b57cec5SDimitry Andric /// 1812*0b57cec5SDimitry Andric /// \param __w 1813*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 1814*0b57cec5SDimitry Andric /// bits of the result. 1815*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 1816*0b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 1817*0b57cec5SDimitry Andric /// set to zero. 1818*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1819*0b57cec5SDimitry Andric _mm_set_sd(double __w) 1820*0b57cec5SDimitry Andric { 1821*0b57cec5SDimitry Andric return __extension__ (__m128d){ __w, 0 }; 1822*0b57cec5SDimitry Andric } 1823*0b57cec5SDimitry Andric 1824*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 1825*0b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 1826*0b57cec5SDimitry Andric /// specified double-precision floating-point value. 1827*0b57cec5SDimitry Andric /// 1828*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1829*0b57cec5SDimitry Andric /// 1830*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 1831*0b57cec5SDimitry Andric /// 1832*0b57cec5SDimitry Andric /// \param __w 1833*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 1834*0b57cec5SDimitry Andric /// element of the result. 1835*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1836*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1837*0b57cec5SDimitry Andric _mm_set1_pd(double __w) 1838*0b57cec5SDimitry Andric { 1839*0b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __w }; 1840*0b57cec5SDimitry Andric } 1841*0b57cec5SDimitry Andric 1842*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 1843*0b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 1844*0b57cec5SDimitry Andric /// specified double-precision floating-point value. 1845*0b57cec5SDimitry Andric /// 1846*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1847*0b57cec5SDimitry Andric /// 1848*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 1849*0b57cec5SDimitry Andric /// 1850*0b57cec5SDimitry Andric /// \param __w 1851*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 1852*0b57cec5SDimitry Andric /// element of the result. 1853*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1854*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1855*0b57cec5SDimitry Andric _mm_set_pd1(double __w) 1856*0b57cec5SDimitry Andric { 1857*0b57cec5SDimitry Andric return _mm_set1_pd(__w); 1858*0b57cec5SDimitry Andric } 1859*0b57cec5SDimitry Andric 1860*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 1861*0b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 1862*0b57cec5SDimitry Andric /// 1863*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1864*0b57cec5SDimitry Andric /// 1865*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 1866*0b57cec5SDimitry Andric /// 1867*0b57cec5SDimitry Andric /// \param __w 1868*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 1869*0b57cec5SDimitry Andric /// bits of the result. 1870*0b57cec5SDimitry Andric /// \param __x 1871*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 1872*0b57cec5SDimitry Andric /// bits of the result. 1873*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1874*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1875*0b57cec5SDimitry Andric _mm_set_pd(double __w, double __x) 1876*0b57cec5SDimitry Andric { 1877*0b57cec5SDimitry Andric return __extension__ (__m128d){ __x, __w }; 1878*0b57cec5SDimitry Andric } 1879*0b57cec5SDimitry Andric 1880*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 1881*0b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 1882*0b57cec5SDimitry Andric /// floating-point values. 1883*0b57cec5SDimitry Andric /// 1884*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1885*0b57cec5SDimitry Andric /// 1886*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 1887*0b57cec5SDimitry Andric /// 1888*0b57cec5SDimitry Andric /// \param __w 1889*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 1890*0b57cec5SDimitry Andric /// bits of the result. 1891*0b57cec5SDimitry Andric /// \param __x 1892*0b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 1893*0b57cec5SDimitry Andric /// bits of the result. 1894*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1895*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1896*0b57cec5SDimitry Andric _mm_setr_pd(double __w, double __x) 1897*0b57cec5SDimitry Andric { 1898*0b57cec5SDimitry Andric return __extension__ (__m128d){ __w, __x }; 1899*0b57cec5SDimitry Andric } 1900*0b57cec5SDimitry Andric 1901*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 1902*0b57cec5SDimitry Andric /// initialized to zero. 1903*0b57cec5SDimitry Andric /// 1904*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1905*0b57cec5SDimitry Andric /// 1906*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 1907*0b57cec5SDimitry Andric /// 1908*0b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 1909*0b57cec5SDimitry Andric /// all elements set to zero. 1910*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1911*0b57cec5SDimitry Andric _mm_setzero_pd(void) 1912*0b57cec5SDimitry Andric { 1913*0b57cec5SDimitry Andric return __extension__ (__m128d){ 0, 0 }; 1914*0b57cec5SDimitry Andric } 1915*0b57cec5SDimitry Andric 1916*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 1917*0b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 1918*0b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 1919*0b57cec5SDimitry Andric /// 1920*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1921*0b57cec5SDimitry Andric /// 1922*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 1923*0b57cec5SDimitry Andric /// 1924*0b57cec5SDimitry Andric /// \param __a 1925*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 1926*0b57cec5SDimitry Andric /// upper 64 bits of the result. 1927*0b57cec5SDimitry Andric /// \param __b 1928*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 1929*0b57cec5SDimitry Andric /// lower 64 bits of the result. 1930*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1931*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1932*0b57cec5SDimitry Andric _mm_move_sd(__m128d __a, __m128d __b) 1933*0b57cec5SDimitry Andric { 1934*0b57cec5SDimitry Andric __a[0] = __b[0]; 1935*0b57cec5SDimitry Andric return __a; 1936*0b57cec5SDimitry Andric } 1937*0b57cec5SDimitry Andric 1938*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 1939*0b57cec5SDimitry Andric /// memory location. 1940*0b57cec5SDimitry Andric /// 1941*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1942*0b57cec5SDimitry Andric /// 1943*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 1944*0b57cec5SDimitry Andric /// 1945*0b57cec5SDimitry Andric /// \param __dp 1946*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 1947*0b57cec5SDimitry Andric /// \param __a 1948*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 1949*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 1950*0b57cec5SDimitry Andric _mm_store_sd(double *__dp, __m128d __a) 1951*0b57cec5SDimitry Andric { 1952*0b57cec5SDimitry Andric struct __mm_store_sd_struct { 1953*0b57cec5SDimitry Andric double __u; 1954*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1955*0b57cec5SDimitry Andric ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; 1956*0b57cec5SDimitry Andric } 1957*0b57cec5SDimitry Andric 1958*0b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 1959*0b57cec5SDimitry Andric /// [2 x double] to a memory location. 1960*0b57cec5SDimitry Andric /// 1961*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1962*0b57cec5SDimitry Andric /// 1963*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 1964*0b57cec5SDimitry Andric /// 1965*0b57cec5SDimitry Andric /// \param __dp 1966*0b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 1967*0b57cec5SDimitry Andric /// double-precision values. 1968*0b57cec5SDimitry Andric /// \param __a 1969*0b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 1970*0b57cec5SDimitry Andric /// moved. 1971*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 1972*0b57cec5SDimitry Andric _mm_store_pd(double *__dp, __m128d __a) 1973*0b57cec5SDimitry Andric { 1974*0b57cec5SDimitry Andric *(__m128d*)__dp = __a; 1975*0b57cec5SDimitry Andric } 1976*0b57cec5SDimitry Andric 1977*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 1978*0b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 1979*0b57cec5SDimitry Andric /// 1980*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1981*0b57cec5SDimitry Andric /// 1982*0b57cec5SDimitry Andric /// This intrinsic corresponds to the 1983*0b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 1984*0b57cec5SDimitry Andric /// 1985*0b57cec5SDimitry Andric /// \param __dp 1986*0b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 1987*0b57cec5SDimitry Andric /// values. 1988*0b57cec5SDimitry Andric /// \param __a 1989*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 1990*0b57cec5SDimitry Andric /// of the values in \a __dp. 1991*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 1992*0b57cec5SDimitry Andric _mm_store1_pd(double *__dp, __m128d __a) 1993*0b57cec5SDimitry Andric { 1994*0b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 1995*0b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 1996*0b57cec5SDimitry Andric } 1997*0b57cec5SDimitry Andric 1998*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 1999*0b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 2000*0b57cec5SDimitry Andric /// 2001*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2002*0b57cec5SDimitry Andric /// 2003*0b57cec5SDimitry Andric /// This intrinsic corresponds to the 2004*0b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 2005*0b57cec5SDimitry Andric /// 2006*0b57cec5SDimitry Andric /// \param __dp 2007*0b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 2008*0b57cec5SDimitry Andric /// values. 2009*0b57cec5SDimitry Andric /// \param __a 2010*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 2011*0b57cec5SDimitry Andric /// of the values in \a __dp. 2012*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2013*0b57cec5SDimitry Andric _mm_store_pd1(double *__dp, __m128d __a) 2014*0b57cec5SDimitry Andric { 2015*0b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 2016*0b57cec5SDimitry Andric } 2017*0b57cec5SDimitry Andric 2018*0b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 2019*0b57cec5SDimitry Andric /// location. 2020*0b57cec5SDimitry Andric /// 2021*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2022*0b57cec5SDimitry Andric /// 2023*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 2024*0b57cec5SDimitry Andric /// 2025*0b57cec5SDimitry Andric /// \param __dp 2026*0b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 2027*0b57cec5SDimitry Andric /// location does not have to be aligned. 2028*0b57cec5SDimitry Andric /// \param __a 2029*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 2030*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2031*0b57cec5SDimitry Andric _mm_storeu_pd(double *__dp, __m128d __a) 2032*0b57cec5SDimitry Andric { 2033*0b57cec5SDimitry Andric struct __storeu_pd { 2034*0b57cec5SDimitry Andric __m128d_u __v; 2035*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2036*0b57cec5SDimitry Andric ((struct __storeu_pd*)__dp)->__v = __a; 2037*0b57cec5SDimitry Andric } 2038*0b57cec5SDimitry Andric 2039*0b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 2040*0b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 2041*0b57cec5SDimitry Andric /// 2042*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2043*0b57cec5SDimitry Andric /// 2044*0b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 2045*0b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 2046*0b57cec5SDimitry Andric /// 2047*0b57cec5SDimitry Andric /// \param __dp 2048*0b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 2049*0b57cec5SDimitry Andric /// double-precision values. 2050*0b57cec5SDimitry Andric /// \param __a 2051*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 2052*0b57cec5SDimitry Andric /// stored. 2053*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2054*0b57cec5SDimitry Andric _mm_storer_pd(double *__dp, __m128d __a) 2055*0b57cec5SDimitry Andric { 2056*0b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 2057*0b57cec5SDimitry Andric *(__m128d *)__dp = __a; 2058*0b57cec5SDimitry Andric } 2059*0b57cec5SDimitry Andric 2060*0b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 2061*0b57cec5SDimitry Andric /// memory location. 2062*0b57cec5SDimitry Andric /// 2063*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2064*0b57cec5SDimitry Andric /// 2065*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 2066*0b57cec5SDimitry Andric /// 2067*0b57cec5SDimitry Andric /// \param __dp 2068*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 2069*0b57cec5SDimitry Andric /// \param __a 2070*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 2071*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2072*0b57cec5SDimitry Andric _mm_storeh_pd(double *__dp, __m128d __a) 2073*0b57cec5SDimitry Andric { 2074*0b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 2075*0b57cec5SDimitry Andric double __u; 2076*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2077*0b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; 2078*0b57cec5SDimitry Andric } 2079*0b57cec5SDimitry Andric 2080*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 2081*0b57cec5SDimitry Andric /// memory location. 2082*0b57cec5SDimitry Andric /// 2083*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2084*0b57cec5SDimitry Andric /// 2085*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 2086*0b57cec5SDimitry Andric /// 2087*0b57cec5SDimitry Andric /// \param __dp 2088*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 2089*0b57cec5SDimitry Andric /// \param __a 2090*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 2091*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2092*0b57cec5SDimitry Andric _mm_storel_pd(double *__dp, __m128d __a) 2093*0b57cec5SDimitry Andric { 2094*0b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 2095*0b57cec5SDimitry Andric double __u; 2096*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2097*0b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; 2098*0b57cec5SDimitry Andric } 2099*0b57cec5SDimitry Andric 2100*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 2101*0b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 2102*0b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 2103*0b57cec5SDimitry Andric /// 2104*0b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 2105*0b57cec5SDimitry Andric /// 2106*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2107*0b57cec5SDimitry Andric /// 2108*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 2109*0b57cec5SDimitry Andric /// 2110*0b57cec5SDimitry Andric /// \param __a 2111*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 2112*0b57cec5SDimitry Andric /// \param __b 2113*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 2114*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 2115*0b57cec5SDimitry Andric /// parameters. 2116*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2117*0b57cec5SDimitry Andric _mm_add_epi8(__m128i __a, __m128i __b) 2118*0b57cec5SDimitry Andric { 2119*0b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 2120*0b57cec5SDimitry Andric } 2121*0b57cec5SDimitry Andric 2122*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 2123*0b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 2124*0b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 2125*0b57cec5SDimitry Andric /// 2126*0b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 2127*0b57cec5SDimitry Andric /// 2128*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2129*0b57cec5SDimitry Andric /// 2130*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 2131*0b57cec5SDimitry Andric /// 2132*0b57cec5SDimitry Andric /// \param __a 2133*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 2134*0b57cec5SDimitry Andric /// \param __b 2135*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 2136*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 2137*0b57cec5SDimitry Andric /// parameters. 2138*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2139*0b57cec5SDimitry Andric _mm_add_epi16(__m128i __a, __m128i __b) 2140*0b57cec5SDimitry Andric { 2141*0b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 2142*0b57cec5SDimitry Andric } 2143*0b57cec5SDimitry Andric 2144*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 2145*0b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 2146*0b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 2147*0b57cec5SDimitry Andric /// 2148*0b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 2149*0b57cec5SDimitry Andric /// 2150*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2151*0b57cec5SDimitry Andric /// 2152*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 2153*0b57cec5SDimitry Andric /// 2154*0b57cec5SDimitry Andric /// \param __a 2155*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 2156*0b57cec5SDimitry Andric /// \param __b 2157*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 2158*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 2159*0b57cec5SDimitry Andric /// parameters. 2160*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2161*0b57cec5SDimitry Andric _mm_add_epi32(__m128i __a, __m128i __b) 2162*0b57cec5SDimitry Andric { 2163*0b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 2164*0b57cec5SDimitry Andric } 2165*0b57cec5SDimitry Andric 2166*0b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 2167*0b57cec5SDimitry Andric /// lower 64 bits of the sum. 2168*0b57cec5SDimitry Andric /// 2169*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2170*0b57cec5SDimitry Andric /// 2171*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 2172*0b57cec5SDimitry Andric /// 2173*0b57cec5SDimitry Andric /// \param __a 2174*0b57cec5SDimitry Andric /// A 64-bit integer. 2175*0b57cec5SDimitry Andric /// \param __b 2176*0b57cec5SDimitry Andric /// A 64-bit integer. 2177*0b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 2178*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 2179*0b57cec5SDimitry Andric _mm_add_si64(__m64 __a, __m64 __b) 2180*0b57cec5SDimitry Andric { 2181*0b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 2182*0b57cec5SDimitry Andric } 2183*0b57cec5SDimitry Andric 2184*0b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 2185*0b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 2186*0b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 2187*0b57cec5SDimitry Andric /// 2188*0b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 2189*0b57cec5SDimitry Andric /// 2190*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2191*0b57cec5SDimitry Andric /// 2192*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 2193*0b57cec5SDimitry Andric /// 2194*0b57cec5SDimitry Andric /// \param __a 2195*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 2196*0b57cec5SDimitry Andric /// \param __b 2197*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 2198*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 2199*0b57cec5SDimitry Andric /// parameters. 2200*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2201*0b57cec5SDimitry Andric _mm_add_epi64(__m128i __a, __m128i __b) 2202*0b57cec5SDimitry Andric { 2203*0b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 2204*0b57cec5SDimitry Andric } 2205*0b57cec5SDimitry Andric 2206*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 2207*0b57cec5SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element of 2208*0b57cec5SDimitry Andric /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are 2209*0b57cec5SDimitry Andric /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. 2210*0b57cec5SDimitry Andric /// 2211*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2212*0b57cec5SDimitry Andric /// 2213*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 2214*0b57cec5SDimitry Andric /// 2215*0b57cec5SDimitry Andric /// \param __a 2216*0b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 2217*0b57cec5SDimitry Andric /// \param __b 2218*0b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 2219*0b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 2220*0b57cec5SDimitry Andric /// both parameters. 2221*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2222*0b57cec5SDimitry Andric _mm_adds_epi8(__m128i __a, __m128i __b) 2223*0b57cec5SDimitry Andric { 2224*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); 2225*0b57cec5SDimitry Andric } 2226*0b57cec5SDimitry Andric 2227*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 2228*0b57cec5SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element of 2229*0b57cec5SDimitry Andric /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF 2230*0b57cec5SDimitry Andric /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 2231*0b57cec5SDimitry Andric /// 0x8000. 2232*0b57cec5SDimitry Andric /// 2233*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2234*0b57cec5SDimitry Andric /// 2235*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 2236*0b57cec5SDimitry Andric /// 2237*0b57cec5SDimitry Andric /// \param __a 2238*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2239*0b57cec5SDimitry Andric /// \param __b 2240*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2241*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 2242*0b57cec5SDimitry Andric /// both parameters. 2243*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2244*0b57cec5SDimitry Andric _mm_adds_epi16(__m128i __a, __m128i __b) 2245*0b57cec5SDimitry Andric { 2246*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); 2247*0b57cec5SDimitry Andric } 2248*0b57cec5SDimitry Andric 2249*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 2250*0b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 2251*0b57cec5SDimitry Andric /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF 2252*0b57cec5SDimitry Andric /// are saturated to 0xFF. Negative sums are saturated to 0x00. 2253*0b57cec5SDimitry Andric /// 2254*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2255*0b57cec5SDimitry Andric /// 2256*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 2257*0b57cec5SDimitry Andric /// 2258*0b57cec5SDimitry Andric /// \param __a 2259*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2260*0b57cec5SDimitry Andric /// \param __b 2261*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2262*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 2263*0b57cec5SDimitry Andric /// of both parameters. 2264*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2265*0b57cec5SDimitry Andric _mm_adds_epu8(__m128i __a, __m128i __b) 2266*0b57cec5SDimitry Andric { 2267*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); 2268*0b57cec5SDimitry Andric } 2269*0b57cec5SDimitry Andric 2270*0b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 2271*0b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 2272*0b57cec5SDimitry Andric /// of a 128-bit result vector of [8 x i16]. Positive sums greater than 2273*0b57cec5SDimitry Andric /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. 2274*0b57cec5SDimitry Andric /// 2275*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2276*0b57cec5SDimitry Andric /// 2277*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 2278*0b57cec5SDimitry Andric /// 2279*0b57cec5SDimitry Andric /// \param __a 2280*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2281*0b57cec5SDimitry Andric /// \param __b 2282*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2283*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 2284*0b57cec5SDimitry Andric /// of both parameters. 2285*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2286*0b57cec5SDimitry Andric _mm_adds_epu16(__m128i __a, __m128i __b) 2287*0b57cec5SDimitry Andric { 2288*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); 2289*0b57cec5SDimitry Andric } 2290*0b57cec5SDimitry Andric 2291*0b57cec5SDimitry Andric /// Computes the rounded avarages of corresponding elements of two 2292*0b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 2293*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 2294*0b57cec5SDimitry Andric /// 2295*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2296*0b57cec5SDimitry Andric /// 2297*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 2298*0b57cec5SDimitry Andric /// 2299*0b57cec5SDimitry Andric /// \param __a 2300*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2301*0b57cec5SDimitry Andric /// \param __b 2302*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2303*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 2304*0b57cec5SDimitry Andric /// averages of both parameters. 2305*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2306*0b57cec5SDimitry Andric _mm_avg_epu8(__m128i __a, __m128i __b) 2307*0b57cec5SDimitry Andric { 2308*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 2309*0b57cec5SDimitry Andric } 2310*0b57cec5SDimitry Andric 2311*0b57cec5SDimitry Andric /// Computes the rounded avarages of corresponding elements of two 2312*0b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 2313*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 2314*0b57cec5SDimitry Andric /// 2315*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2316*0b57cec5SDimitry Andric /// 2317*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 2318*0b57cec5SDimitry Andric /// 2319*0b57cec5SDimitry Andric /// \param __a 2320*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2321*0b57cec5SDimitry Andric /// \param __b 2322*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2323*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 2324*0b57cec5SDimitry Andric /// averages of both parameters. 2325*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2326*0b57cec5SDimitry Andric _mm_avg_epu16(__m128i __a, __m128i __b) 2327*0b57cec5SDimitry Andric { 2328*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 2329*0b57cec5SDimitry Andric } 2330*0b57cec5SDimitry Andric 2331*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 2332*0b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 2333*0b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 2334*0b57cec5SDimitry Andric /// [4 x i32] vector. 2335*0b57cec5SDimitry Andric /// 2336*0b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 2337*0b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 2338*0b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 2339*0b57cec5SDimitry Andric /// of the result. 2340*0b57cec5SDimitry Andric /// 2341*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2342*0b57cec5SDimitry Andric /// 2343*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 2344*0b57cec5SDimitry Andric /// 2345*0b57cec5SDimitry Andric /// \param __a 2346*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2347*0b57cec5SDimitry Andric /// \param __b 2348*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2349*0b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 2350*0b57cec5SDimitry Andric /// of both parameters. 2351*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2352*0b57cec5SDimitry Andric _mm_madd_epi16(__m128i __a, __m128i __b) 2353*0b57cec5SDimitry Andric { 2354*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 2355*0b57cec5SDimitry Andric } 2356*0b57cec5SDimitry Andric 2357*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 2358*0b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 2359*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 2360*0b57cec5SDimitry Andric /// 2361*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2362*0b57cec5SDimitry Andric /// 2363*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 2364*0b57cec5SDimitry Andric /// 2365*0b57cec5SDimitry Andric /// \param __a 2366*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2367*0b57cec5SDimitry Andric /// \param __b 2368*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2369*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 2370*0b57cec5SDimitry Andric /// each comparison. 2371*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2372*0b57cec5SDimitry Andric _mm_max_epi16(__m128i __a, __m128i __b) 2373*0b57cec5SDimitry Andric { 2374*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); 2375*0b57cec5SDimitry Andric } 2376*0b57cec5SDimitry Andric 2377*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 2378*0b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 2379*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 2380*0b57cec5SDimitry Andric /// 2381*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2382*0b57cec5SDimitry Andric /// 2383*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 2384*0b57cec5SDimitry Andric /// 2385*0b57cec5SDimitry Andric /// \param __a 2386*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2387*0b57cec5SDimitry Andric /// \param __b 2388*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2389*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 2390*0b57cec5SDimitry Andric /// each comparison. 2391*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2392*0b57cec5SDimitry Andric _mm_max_epu8(__m128i __a, __m128i __b) 2393*0b57cec5SDimitry Andric { 2394*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); 2395*0b57cec5SDimitry Andric } 2396*0b57cec5SDimitry Andric 2397*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 2398*0b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 2399*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 2400*0b57cec5SDimitry Andric /// 2401*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2402*0b57cec5SDimitry Andric /// 2403*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 2404*0b57cec5SDimitry Andric /// 2405*0b57cec5SDimitry Andric /// \param __a 2406*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2407*0b57cec5SDimitry Andric /// \param __b 2408*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2409*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 2410*0b57cec5SDimitry Andric /// each comparison. 2411*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2412*0b57cec5SDimitry Andric _mm_min_epi16(__m128i __a, __m128i __b) 2413*0b57cec5SDimitry Andric { 2414*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); 2415*0b57cec5SDimitry Andric } 2416*0b57cec5SDimitry Andric 2417*0b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 2418*0b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 2419*0b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 2420*0b57cec5SDimitry Andric /// 2421*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2422*0b57cec5SDimitry Andric /// 2423*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 2424*0b57cec5SDimitry Andric /// 2425*0b57cec5SDimitry Andric /// \param __a 2426*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2427*0b57cec5SDimitry Andric /// \param __b 2428*0b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 2429*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 2430*0b57cec5SDimitry Andric /// each comparison. 2431*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2432*0b57cec5SDimitry Andric _mm_min_epu8(__m128i __a, __m128i __b) 2433*0b57cec5SDimitry Andric { 2434*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); 2435*0b57cec5SDimitry Andric } 2436*0b57cec5SDimitry Andric 2437*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 2438*0b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 2439*0b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 2440*0b57cec5SDimitry Andric /// 2441*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2442*0b57cec5SDimitry Andric /// 2443*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 2444*0b57cec5SDimitry Andric /// 2445*0b57cec5SDimitry Andric /// \param __a 2446*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2447*0b57cec5SDimitry Andric /// \param __b 2448*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2449*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 2450*0b57cec5SDimitry Andric /// each of the eight 32-bit products. 2451*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2452*0b57cec5SDimitry Andric _mm_mulhi_epi16(__m128i __a, __m128i __b) 2453*0b57cec5SDimitry Andric { 2454*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 2455*0b57cec5SDimitry Andric } 2456*0b57cec5SDimitry Andric 2457*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 2458*0b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 2459*0b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 2460*0b57cec5SDimitry Andric /// 2461*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2462*0b57cec5SDimitry Andric /// 2463*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 2464*0b57cec5SDimitry Andric /// 2465*0b57cec5SDimitry Andric /// \param __a 2466*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2467*0b57cec5SDimitry Andric /// \param __b 2468*0b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 2469*0b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 2470*0b57cec5SDimitry Andric /// of each of the eight 32-bit products. 2471*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2472*0b57cec5SDimitry Andric _mm_mulhi_epu16(__m128i __a, __m128i __b) 2473*0b57cec5SDimitry Andric { 2474*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 2475*0b57cec5SDimitry Andric } 2476*0b57cec5SDimitry Andric 2477*0b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 2478*0b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 2479*0b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 2480*0b57cec5SDimitry Andric /// 2481*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2482*0b57cec5SDimitry Andric /// 2483*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 2484*0b57cec5SDimitry Andric /// 2485*0b57cec5SDimitry Andric /// \param __a 2486*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2487*0b57cec5SDimitry Andric /// \param __b 2488*0b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 2489*0b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 2490*0b57cec5SDimitry Andric /// each of the eight 32-bit products. 2491*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2492*0b57cec5SDimitry Andric _mm_mullo_epi16(__m128i __a, __m128i __b) 2493*0b57cec5SDimitry Andric { 2494*0b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 2495*0b57cec5SDimitry Andric } 2496*0b57cec5SDimitry Andric 2497*0b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 2498*0b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 2499*0b57cec5SDimitry Andric /// product. 2500*0b57cec5SDimitry Andric /// 2501*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2502*0b57cec5SDimitry Andric /// 2503*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 2504*0b57cec5SDimitry Andric /// 2505*0b57cec5SDimitry Andric /// \param __a 2506*0b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 2507*0b57cec5SDimitry Andric /// \param __b 2508*0b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 2509*0b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 2510*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 2511*0b57cec5SDimitry Andric _mm_mul_su32(__m64 __a, __m64 __b) 2512*0b57cec5SDimitry Andric { 2513*0b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 2514*0b57cec5SDimitry Andric } 2515*0b57cec5SDimitry Andric 2516*0b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 2517*0b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 2518*0b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 2519*0b57cec5SDimitry Andric /// 2520*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2521*0b57cec5SDimitry Andric /// 2522*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 2523*0b57cec5SDimitry Andric /// 2524*0b57cec5SDimitry Andric /// \param __a 2525*0b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 2526*0b57cec5SDimitry Andric /// \param __b 2527*0b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 2528*0b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 2529*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2530*0b57cec5SDimitry Andric _mm_mul_epu32(__m128i __a, __m128i __b) 2531*0b57cec5SDimitry Andric { 2532*0b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 2533*0b57cec5SDimitry Andric } 2534*0b57cec5SDimitry Andric 2535*0b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 2536*0b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 2537*0b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 2538*0b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 2539*0b57cec5SDimitry Andric /// [2 x i64] vector. 2540*0b57cec5SDimitry Andric /// 2541*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2542*0b57cec5SDimitry Andric /// 2543*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 2544*0b57cec5SDimitry Andric /// 2545*0b57cec5SDimitry Andric /// \param __a 2546*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2547*0b57cec5SDimitry Andric /// \param __b 2548*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2549*0b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 2550*0b57cec5SDimitry Andric /// differences between both operands. 2551*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2552*0b57cec5SDimitry Andric _mm_sad_epu8(__m128i __a, __m128i __b) 2553*0b57cec5SDimitry Andric { 2554*0b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 2555*0b57cec5SDimitry Andric } 2556*0b57cec5SDimitry Andric 2557*0b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 2558*0b57cec5SDimitry Andric /// 2559*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2560*0b57cec5SDimitry Andric /// 2561*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 2562*0b57cec5SDimitry Andric /// 2563*0b57cec5SDimitry Andric /// \param __a 2564*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2565*0b57cec5SDimitry Andric /// \param __b 2566*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2567*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2568*0b57cec5SDimitry Andric /// in the operands. 2569*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2570*0b57cec5SDimitry Andric _mm_sub_epi8(__m128i __a, __m128i __b) 2571*0b57cec5SDimitry Andric { 2572*0b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 2573*0b57cec5SDimitry Andric } 2574*0b57cec5SDimitry Andric 2575*0b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 2576*0b57cec5SDimitry Andric /// 2577*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2578*0b57cec5SDimitry Andric /// 2579*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 2580*0b57cec5SDimitry Andric /// 2581*0b57cec5SDimitry Andric /// \param __a 2582*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2583*0b57cec5SDimitry Andric /// \param __b 2584*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2585*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2586*0b57cec5SDimitry Andric /// in the operands. 2587*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2588*0b57cec5SDimitry Andric _mm_sub_epi16(__m128i __a, __m128i __b) 2589*0b57cec5SDimitry Andric { 2590*0b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 2591*0b57cec5SDimitry Andric } 2592*0b57cec5SDimitry Andric 2593*0b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 2594*0b57cec5SDimitry Andric /// 2595*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2596*0b57cec5SDimitry Andric /// 2597*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 2598*0b57cec5SDimitry Andric /// 2599*0b57cec5SDimitry Andric /// \param __a 2600*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2601*0b57cec5SDimitry Andric /// \param __b 2602*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2603*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2604*0b57cec5SDimitry Andric /// in the operands. 2605*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2606*0b57cec5SDimitry Andric _mm_sub_epi32(__m128i __a, __m128i __b) 2607*0b57cec5SDimitry Andric { 2608*0b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 2609*0b57cec5SDimitry Andric } 2610*0b57cec5SDimitry Andric 2611*0b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 2612*0b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 2613*0b57cec5SDimitry Andric /// 2614*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2615*0b57cec5SDimitry Andric /// 2616*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 2617*0b57cec5SDimitry Andric /// 2618*0b57cec5SDimitry Andric /// \param __a 2619*0b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 2620*0b57cec5SDimitry Andric /// \param __b 2621*0b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 2622*0b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 2623*0b57cec5SDimitry Andric /// the operands. 2624*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 2625*0b57cec5SDimitry Andric _mm_sub_si64(__m64 __a, __m64 __b) 2626*0b57cec5SDimitry Andric { 2627*0b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 2628*0b57cec5SDimitry Andric } 2629*0b57cec5SDimitry Andric 2630*0b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 2631*0b57cec5SDimitry Andric /// 2632*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2633*0b57cec5SDimitry Andric /// 2634*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 2635*0b57cec5SDimitry Andric /// 2636*0b57cec5SDimitry Andric /// \param __a 2637*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2638*0b57cec5SDimitry Andric /// \param __b 2639*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2640*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2641*0b57cec5SDimitry Andric /// in the operands. 2642*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2643*0b57cec5SDimitry Andric _mm_sub_epi64(__m128i __a, __m128i __b) 2644*0b57cec5SDimitry Andric { 2645*0b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 2646*0b57cec5SDimitry Andric } 2647*0b57cec5SDimitry Andric 2648*0b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and 2649*0b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 2650*0b57cec5SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences less 2651*0b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. 2652*0b57cec5SDimitry Andric /// 2653*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2654*0b57cec5SDimitry Andric /// 2655*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 2656*0b57cec5SDimitry Andric /// 2657*0b57cec5SDimitry Andric /// \param __a 2658*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2659*0b57cec5SDimitry Andric /// \param __b 2660*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2661*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2662*0b57cec5SDimitry Andric /// in the operands. 2663*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2664*0b57cec5SDimitry Andric _mm_subs_epi8(__m128i __a, __m128i __b) 2665*0b57cec5SDimitry Andric { 2666*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); 2667*0b57cec5SDimitry Andric } 2668*0b57cec5SDimitry Andric 2669*0b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and 2670*0b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 2671*0b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 2672*0b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 2673*0b57cec5SDimitry Andric /// 2674*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2675*0b57cec5SDimitry Andric /// 2676*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 2677*0b57cec5SDimitry Andric /// 2678*0b57cec5SDimitry Andric /// \param __a 2679*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2680*0b57cec5SDimitry Andric /// \param __b 2681*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2682*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 2683*0b57cec5SDimitry Andric /// in the operands. 2684*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2685*0b57cec5SDimitry Andric _mm_subs_epi16(__m128i __a, __m128i __b) 2686*0b57cec5SDimitry Andric { 2687*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); 2688*0b57cec5SDimitry Andric } 2689*0b57cec5SDimitry Andric 2690*0b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input 2691*0b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 2692*0b57cec5SDimitry Andric /// destination. Differences less than 0x00 are saturated to 0x00. 2693*0b57cec5SDimitry Andric /// 2694*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2695*0b57cec5SDimitry Andric /// 2696*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 2697*0b57cec5SDimitry Andric /// 2698*0b57cec5SDimitry Andric /// \param __a 2699*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2700*0b57cec5SDimitry Andric /// \param __b 2701*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2702*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 2703*0b57cec5SDimitry Andric /// differences of the values in the operands. 2704*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2705*0b57cec5SDimitry Andric _mm_subs_epu8(__m128i __a, __m128i __b) 2706*0b57cec5SDimitry Andric { 2707*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); 2708*0b57cec5SDimitry Andric } 2709*0b57cec5SDimitry Andric 2710*0b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input 2711*0b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 2712*0b57cec5SDimitry Andric /// destination. Differences less than 0x0000 are saturated to 0x0000. 2713*0b57cec5SDimitry Andric /// 2714*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2715*0b57cec5SDimitry Andric /// 2716*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 2717*0b57cec5SDimitry Andric /// 2718*0b57cec5SDimitry Andric /// \param __a 2719*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 2720*0b57cec5SDimitry Andric /// \param __b 2721*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 2722*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 2723*0b57cec5SDimitry Andric /// differences of the values in the operands. 2724*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2725*0b57cec5SDimitry Andric _mm_subs_epu16(__m128i __a, __m128i __b) 2726*0b57cec5SDimitry Andric { 2727*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); 2728*0b57cec5SDimitry Andric } 2729*0b57cec5SDimitry Andric 2730*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 2731*0b57cec5SDimitry Andric /// 2732*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2733*0b57cec5SDimitry Andric /// 2734*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 2735*0b57cec5SDimitry Andric /// 2736*0b57cec5SDimitry Andric /// \param __a 2737*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2738*0b57cec5SDimitry Andric /// \param __b 2739*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2740*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 2741*0b57cec5SDimitry Andric /// in both operands. 2742*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2743*0b57cec5SDimitry Andric _mm_and_si128(__m128i __a, __m128i __b) 2744*0b57cec5SDimitry Andric { 2745*0b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 2746*0b57cec5SDimitry Andric } 2747*0b57cec5SDimitry Andric 2748*0b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 2749*0b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 2750*0b57cec5SDimitry Andric /// 2751*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2752*0b57cec5SDimitry Andric /// 2753*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 2754*0b57cec5SDimitry Andric /// 2755*0b57cec5SDimitry Andric /// \param __a 2756*0b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 2757*0b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 2758*0b57cec5SDimitry Andric /// \param __b 2759*0b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 2760*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 2761*0b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 2762*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2763*0b57cec5SDimitry Andric _mm_andnot_si128(__m128i __a, __m128i __b) 2764*0b57cec5SDimitry Andric { 2765*0b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 2766*0b57cec5SDimitry Andric } 2767*0b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 2768*0b57cec5SDimitry Andric /// 2769*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2770*0b57cec5SDimitry Andric /// 2771*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 2772*0b57cec5SDimitry Andric /// 2773*0b57cec5SDimitry Andric /// \param __a 2774*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2775*0b57cec5SDimitry Andric /// \param __b 2776*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2777*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 2778*0b57cec5SDimitry Andric /// in both operands. 2779*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2780*0b57cec5SDimitry Andric _mm_or_si128(__m128i __a, __m128i __b) 2781*0b57cec5SDimitry Andric { 2782*0b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 2783*0b57cec5SDimitry Andric } 2784*0b57cec5SDimitry Andric 2785*0b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 2786*0b57cec5SDimitry Andric /// 2787*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2788*0b57cec5SDimitry Andric /// 2789*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 2790*0b57cec5SDimitry Andric /// 2791*0b57cec5SDimitry Andric /// \param __a 2792*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2793*0b57cec5SDimitry Andric /// \param __b 2794*0b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 2795*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 2796*0b57cec5SDimitry Andric /// values in both operands. 2797*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2798*0b57cec5SDimitry Andric _mm_xor_si128(__m128i __a, __m128i __b) 2799*0b57cec5SDimitry Andric { 2800*0b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 2801*0b57cec5SDimitry Andric } 2802*0b57cec5SDimitry Andric 2803*0b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 2804*0b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 2805*0b57cec5SDimitry Andric /// 2806*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2807*0b57cec5SDimitry Andric /// 2808*0b57cec5SDimitry Andric /// \code 2809*0b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 2810*0b57cec5SDimitry Andric /// \endcode 2811*0b57cec5SDimitry Andric /// 2812*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 2813*0b57cec5SDimitry Andric /// 2814*0b57cec5SDimitry Andric /// \param a 2815*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2816*0b57cec5SDimitry Andric /// \param imm 2817*0b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 2818*0b57cec5SDimitry Andric /// \a a. 2819*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 2820*0b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 2821*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 2822*0b57cec5SDimitry Andric 2823*0b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 2824*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 2825*0b57cec5SDimitry Andric 2826*0b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 2827*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2828*0b57cec5SDimitry Andric /// 2829*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2830*0b57cec5SDimitry Andric /// 2831*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 2832*0b57cec5SDimitry Andric /// 2833*0b57cec5SDimitry Andric /// \param __a 2834*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2835*0b57cec5SDimitry Andric /// \param __count 2836*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 2837*0b57cec5SDimitry Andric /// in operand \a __a. 2838*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2839*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2840*0b57cec5SDimitry Andric _mm_slli_epi16(__m128i __a, int __count) 2841*0b57cec5SDimitry Andric { 2842*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 2843*0b57cec5SDimitry Andric } 2844*0b57cec5SDimitry Andric 2845*0b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 2846*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2847*0b57cec5SDimitry Andric /// 2848*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2849*0b57cec5SDimitry Andric /// 2850*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 2851*0b57cec5SDimitry Andric /// 2852*0b57cec5SDimitry Andric /// \param __a 2853*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2854*0b57cec5SDimitry Andric /// \param __count 2855*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 2856*0b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 2857*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2858*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2859*0b57cec5SDimitry Andric _mm_sll_epi16(__m128i __a, __m128i __count) 2860*0b57cec5SDimitry Andric { 2861*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 2862*0b57cec5SDimitry Andric } 2863*0b57cec5SDimitry Andric 2864*0b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 2865*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2866*0b57cec5SDimitry Andric /// 2867*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2868*0b57cec5SDimitry Andric /// 2869*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 2870*0b57cec5SDimitry Andric /// 2871*0b57cec5SDimitry Andric /// \param __a 2872*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2873*0b57cec5SDimitry Andric /// \param __count 2874*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 2875*0b57cec5SDimitry Andric /// in operand \a __a. 2876*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2877*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2878*0b57cec5SDimitry Andric _mm_slli_epi32(__m128i __a, int __count) 2879*0b57cec5SDimitry Andric { 2880*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 2881*0b57cec5SDimitry Andric } 2882*0b57cec5SDimitry Andric 2883*0b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 2884*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2885*0b57cec5SDimitry Andric /// 2886*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2887*0b57cec5SDimitry Andric /// 2888*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 2889*0b57cec5SDimitry Andric /// 2890*0b57cec5SDimitry Andric /// \param __a 2891*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2892*0b57cec5SDimitry Andric /// \param __count 2893*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 2894*0b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 2895*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2896*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2897*0b57cec5SDimitry Andric _mm_sll_epi32(__m128i __a, __m128i __count) 2898*0b57cec5SDimitry Andric { 2899*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 2900*0b57cec5SDimitry Andric } 2901*0b57cec5SDimitry Andric 2902*0b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 2903*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2904*0b57cec5SDimitry Andric /// 2905*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2906*0b57cec5SDimitry Andric /// 2907*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 2908*0b57cec5SDimitry Andric /// 2909*0b57cec5SDimitry Andric /// \param __a 2910*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2911*0b57cec5SDimitry Andric /// \param __count 2912*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 2913*0b57cec5SDimitry Andric /// in operand \a __a. 2914*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2915*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2916*0b57cec5SDimitry Andric _mm_slli_epi64(__m128i __a, int __count) 2917*0b57cec5SDimitry Andric { 2918*0b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 2919*0b57cec5SDimitry Andric } 2920*0b57cec5SDimitry Andric 2921*0b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 2922*0b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 2923*0b57cec5SDimitry Andric /// 2924*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2925*0b57cec5SDimitry Andric /// 2926*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 2927*0b57cec5SDimitry Andric /// 2928*0b57cec5SDimitry Andric /// \param __a 2929*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2930*0b57cec5SDimitry Andric /// \param __count 2931*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 2932*0b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 2933*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2934*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2935*0b57cec5SDimitry Andric _mm_sll_epi64(__m128i __a, __m128i __count) 2936*0b57cec5SDimitry Andric { 2937*0b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 2938*0b57cec5SDimitry Andric } 2939*0b57cec5SDimitry Andric 2940*0b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 2941*0b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 2942*0b57cec5SDimitry Andric /// bit of the initial value. 2943*0b57cec5SDimitry Andric /// 2944*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2945*0b57cec5SDimitry Andric /// 2946*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 2947*0b57cec5SDimitry Andric /// 2948*0b57cec5SDimitry Andric /// \param __a 2949*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2950*0b57cec5SDimitry Andric /// \param __count 2951*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 2952*0b57cec5SDimitry Andric /// in operand \a __a. 2953*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2954*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2955*0b57cec5SDimitry Andric _mm_srai_epi16(__m128i __a, int __count) 2956*0b57cec5SDimitry Andric { 2957*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 2958*0b57cec5SDimitry Andric } 2959*0b57cec5SDimitry Andric 2960*0b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 2961*0b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 2962*0b57cec5SDimitry Andric /// bit of the initial value. 2963*0b57cec5SDimitry Andric /// 2964*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2965*0b57cec5SDimitry Andric /// 2966*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 2967*0b57cec5SDimitry Andric /// 2968*0b57cec5SDimitry Andric /// \param __a 2969*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2970*0b57cec5SDimitry Andric /// \param __count 2971*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 2972*0b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 2973*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2974*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2975*0b57cec5SDimitry Andric _mm_sra_epi16(__m128i __a, __m128i __count) 2976*0b57cec5SDimitry Andric { 2977*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 2978*0b57cec5SDimitry Andric } 2979*0b57cec5SDimitry Andric 2980*0b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 2981*0b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 2982*0b57cec5SDimitry Andric /// bit of the initial value. 2983*0b57cec5SDimitry Andric /// 2984*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2985*0b57cec5SDimitry Andric /// 2986*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 2987*0b57cec5SDimitry Andric /// 2988*0b57cec5SDimitry Andric /// \param __a 2989*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 2990*0b57cec5SDimitry Andric /// \param __count 2991*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 2992*0b57cec5SDimitry Andric /// in operand \a __a. 2993*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2994*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2995*0b57cec5SDimitry Andric _mm_srai_epi32(__m128i __a, int __count) 2996*0b57cec5SDimitry Andric { 2997*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 2998*0b57cec5SDimitry Andric } 2999*0b57cec5SDimitry Andric 3000*0b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 3001*0b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 3002*0b57cec5SDimitry Andric /// bit of the initial value. 3003*0b57cec5SDimitry Andric /// 3004*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3005*0b57cec5SDimitry Andric /// 3006*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 3007*0b57cec5SDimitry Andric /// 3008*0b57cec5SDimitry Andric /// \param __a 3009*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3010*0b57cec5SDimitry Andric /// \param __count 3011*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 3012*0b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 3013*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3014*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3015*0b57cec5SDimitry Andric _mm_sra_epi32(__m128i __a, __m128i __count) 3016*0b57cec5SDimitry Andric { 3017*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 3018*0b57cec5SDimitry Andric } 3019*0b57cec5SDimitry Andric 3020*0b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 3021*0b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 3022*0b57cec5SDimitry Andric /// 3023*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3024*0b57cec5SDimitry Andric /// 3025*0b57cec5SDimitry Andric /// \code 3026*0b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 3027*0b57cec5SDimitry Andric /// \endcode 3028*0b57cec5SDimitry Andric /// 3029*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 3030*0b57cec5SDimitry Andric /// 3031*0b57cec5SDimitry Andric /// \param a 3032*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3033*0b57cec5SDimitry Andric /// \param imm 3034*0b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 3035*0b57cec5SDimitry Andric /// \a a. 3036*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 3037*0b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 3038*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 3039*0b57cec5SDimitry Andric 3040*0b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 3041*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)) 3042*0b57cec5SDimitry Andric 3043*0b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 3044*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3045*0b57cec5SDimitry Andric /// 3046*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3047*0b57cec5SDimitry Andric /// 3048*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 3049*0b57cec5SDimitry Andric /// 3050*0b57cec5SDimitry Andric /// \param __a 3051*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3052*0b57cec5SDimitry Andric /// \param __count 3053*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 3054*0b57cec5SDimitry Andric /// in operand \a __a. 3055*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3056*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3057*0b57cec5SDimitry Andric _mm_srli_epi16(__m128i __a, int __count) 3058*0b57cec5SDimitry Andric { 3059*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 3060*0b57cec5SDimitry Andric } 3061*0b57cec5SDimitry Andric 3062*0b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 3063*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3064*0b57cec5SDimitry Andric /// 3065*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3066*0b57cec5SDimitry Andric /// 3067*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 3068*0b57cec5SDimitry Andric /// 3069*0b57cec5SDimitry Andric /// \param __a 3070*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3071*0b57cec5SDimitry Andric /// \param __count 3072*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 3073*0b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 3074*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3075*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3076*0b57cec5SDimitry Andric _mm_srl_epi16(__m128i __a, __m128i __count) 3077*0b57cec5SDimitry Andric { 3078*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 3079*0b57cec5SDimitry Andric } 3080*0b57cec5SDimitry Andric 3081*0b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 3082*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3083*0b57cec5SDimitry Andric /// 3084*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3085*0b57cec5SDimitry Andric /// 3086*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 3087*0b57cec5SDimitry Andric /// 3088*0b57cec5SDimitry Andric /// \param __a 3089*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3090*0b57cec5SDimitry Andric /// \param __count 3091*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 3092*0b57cec5SDimitry Andric /// in operand \a __a. 3093*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3094*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3095*0b57cec5SDimitry Andric _mm_srli_epi32(__m128i __a, int __count) 3096*0b57cec5SDimitry Andric { 3097*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 3098*0b57cec5SDimitry Andric } 3099*0b57cec5SDimitry Andric 3100*0b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 3101*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3102*0b57cec5SDimitry Andric /// 3103*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3104*0b57cec5SDimitry Andric /// 3105*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 3106*0b57cec5SDimitry Andric /// 3107*0b57cec5SDimitry Andric /// \param __a 3108*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3109*0b57cec5SDimitry Andric /// \param __count 3110*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 3111*0b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 3112*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3113*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3114*0b57cec5SDimitry Andric _mm_srl_epi32(__m128i __a, __m128i __count) 3115*0b57cec5SDimitry Andric { 3116*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 3117*0b57cec5SDimitry Andric } 3118*0b57cec5SDimitry Andric 3119*0b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 3120*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3121*0b57cec5SDimitry Andric /// 3122*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3123*0b57cec5SDimitry Andric /// 3124*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 3125*0b57cec5SDimitry Andric /// 3126*0b57cec5SDimitry Andric /// \param __a 3127*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3128*0b57cec5SDimitry Andric /// \param __count 3129*0b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 3130*0b57cec5SDimitry Andric /// in operand \a __a. 3131*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3132*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3133*0b57cec5SDimitry Andric _mm_srli_epi64(__m128i __a, int __count) 3134*0b57cec5SDimitry Andric { 3135*0b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 3136*0b57cec5SDimitry Andric } 3137*0b57cec5SDimitry Andric 3138*0b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 3139*0b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 3140*0b57cec5SDimitry Andric /// 3141*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3142*0b57cec5SDimitry Andric /// 3143*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 3144*0b57cec5SDimitry Andric /// 3145*0b57cec5SDimitry Andric /// \param __a 3146*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 3147*0b57cec5SDimitry Andric /// \param __count 3148*0b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 3149*0b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 3150*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 3151*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3152*0b57cec5SDimitry Andric _mm_srl_epi64(__m128i __a, __m128i __count) 3153*0b57cec5SDimitry Andric { 3154*0b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 3155*0b57cec5SDimitry Andric } 3156*0b57cec5SDimitry Andric 3157*0b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 3158*0b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF 3159*0b57cec5SDimitry Andric /// for true. 3160*0b57cec5SDimitry Andric /// 3161*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3162*0b57cec5SDimitry Andric /// 3163*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 3164*0b57cec5SDimitry Andric /// 3165*0b57cec5SDimitry Andric /// \param __a 3166*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3167*0b57cec5SDimitry Andric /// \param __b 3168*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3169*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3170*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3171*0b57cec5SDimitry Andric _mm_cmpeq_epi8(__m128i __a, __m128i __b) 3172*0b57cec5SDimitry Andric { 3173*0b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 3174*0b57cec5SDimitry Andric } 3175*0b57cec5SDimitry Andric 3176*0b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 3177*0b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 3178*0b57cec5SDimitry Andric /// 0xFFFF for true. 3179*0b57cec5SDimitry Andric /// 3180*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3181*0b57cec5SDimitry Andric /// 3182*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 3183*0b57cec5SDimitry Andric /// 3184*0b57cec5SDimitry Andric /// \param __a 3185*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3186*0b57cec5SDimitry Andric /// \param __b 3187*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3188*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3189*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3190*0b57cec5SDimitry Andric _mm_cmpeq_epi16(__m128i __a, __m128i __b) 3191*0b57cec5SDimitry Andric { 3192*0b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 3193*0b57cec5SDimitry Andric } 3194*0b57cec5SDimitry Andric 3195*0b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 3196*0b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 3197*0b57cec5SDimitry Andric /// 0xFFFFFFFF for true. 3198*0b57cec5SDimitry Andric /// 3199*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3200*0b57cec5SDimitry Andric /// 3201*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 3202*0b57cec5SDimitry Andric /// 3203*0b57cec5SDimitry Andric /// \param __a 3204*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3205*0b57cec5SDimitry Andric /// \param __b 3206*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3207*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3208*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3209*0b57cec5SDimitry Andric _mm_cmpeq_epi32(__m128i __a, __m128i __b) 3210*0b57cec5SDimitry Andric { 3211*0b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 3212*0b57cec5SDimitry Andric } 3213*0b57cec5SDimitry Andric 3214*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 3215*0b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 3216*0b57cec5SDimitry Andric /// greater than those in the second operand. Each comparison yields 0x0 for 3217*0b57cec5SDimitry Andric /// false, 0xFF for true. 3218*0b57cec5SDimitry Andric /// 3219*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3220*0b57cec5SDimitry Andric /// 3221*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 3222*0b57cec5SDimitry Andric /// 3223*0b57cec5SDimitry Andric /// \param __a 3224*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3225*0b57cec5SDimitry Andric /// \param __b 3226*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3227*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3228*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3229*0b57cec5SDimitry Andric _mm_cmpgt_epi8(__m128i __a, __m128i __b) 3230*0b57cec5SDimitry Andric { 3231*0b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 3232*0b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 3233*0b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 3234*0b57cec5SDimitry Andric } 3235*0b57cec5SDimitry Andric 3236*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 3237*0b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 3238*0b57cec5SDimitry Andric /// are greater than those in the second operand. 3239*0b57cec5SDimitry Andric /// 3240*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 3241*0b57cec5SDimitry Andric /// 3242*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3243*0b57cec5SDimitry Andric /// 3244*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 3245*0b57cec5SDimitry Andric /// 3246*0b57cec5SDimitry Andric /// \param __a 3247*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3248*0b57cec5SDimitry Andric /// \param __b 3249*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3250*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3251*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3252*0b57cec5SDimitry Andric _mm_cmpgt_epi16(__m128i __a, __m128i __b) 3253*0b57cec5SDimitry Andric { 3254*0b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 3255*0b57cec5SDimitry Andric } 3256*0b57cec5SDimitry Andric 3257*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 3258*0b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 3259*0b57cec5SDimitry Andric /// are greater than those in the second operand. 3260*0b57cec5SDimitry Andric /// 3261*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 3262*0b57cec5SDimitry Andric /// 3263*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3264*0b57cec5SDimitry Andric /// 3265*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 3266*0b57cec5SDimitry Andric /// 3267*0b57cec5SDimitry Andric /// \param __a 3268*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3269*0b57cec5SDimitry Andric /// \param __b 3270*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3271*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3272*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3273*0b57cec5SDimitry Andric _mm_cmpgt_epi32(__m128i __a, __m128i __b) 3274*0b57cec5SDimitry Andric { 3275*0b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 3276*0b57cec5SDimitry Andric } 3277*0b57cec5SDimitry Andric 3278*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 3279*0b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 3280*0b57cec5SDimitry Andric /// than those in the second operand. 3281*0b57cec5SDimitry Andric /// 3282*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFF for true. 3283*0b57cec5SDimitry Andric /// 3284*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3285*0b57cec5SDimitry Andric /// 3286*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 3287*0b57cec5SDimitry Andric /// 3288*0b57cec5SDimitry Andric /// \param __a 3289*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3290*0b57cec5SDimitry Andric /// \param __b 3291*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3292*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3293*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3294*0b57cec5SDimitry Andric _mm_cmplt_epi8(__m128i __a, __m128i __b) 3295*0b57cec5SDimitry Andric { 3296*0b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 3297*0b57cec5SDimitry Andric } 3298*0b57cec5SDimitry Andric 3299*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 3300*0b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 3301*0b57cec5SDimitry Andric /// are less than those in the second operand. 3302*0b57cec5SDimitry Andric /// 3303*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 3304*0b57cec5SDimitry Andric /// 3305*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3306*0b57cec5SDimitry Andric /// 3307*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 3308*0b57cec5SDimitry Andric /// 3309*0b57cec5SDimitry Andric /// \param __a 3310*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3311*0b57cec5SDimitry Andric /// \param __b 3312*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3313*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3314*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3315*0b57cec5SDimitry Andric _mm_cmplt_epi16(__m128i __a, __m128i __b) 3316*0b57cec5SDimitry Andric { 3317*0b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 3318*0b57cec5SDimitry Andric } 3319*0b57cec5SDimitry Andric 3320*0b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 3321*0b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 3322*0b57cec5SDimitry Andric /// are less than those in the second operand. 3323*0b57cec5SDimitry Andric /// 3324*0b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 3325*0b57cec5SDimitry Andric /// 3326*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3327*0b57cec5SDimitry Andric /// 3328*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 3329*0b57cec5SDimitry Andric /// 3330*0b57cec5SDimitry Andric /// \param __a 3331*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3332*0b57cec5SDimitry Andric /// \param __b 3333*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3334*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3335*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3336*0b57cec5SDimitry Andric _mm_cmplt_epi32(__m128i __a, __m128i __b) 3337*0b57cec5SDimitry Andric { 3338*0b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 3339*0b57cec5SDimitry Andric } 3340*0b57cec5SDimitry Andric 3341*0b57cec5SDimitry Andric #ifdef __x86_64__ 3342*0b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 3343*0b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 3344*0b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 3345*0b57cec5SDimitry Andric /// the upper element of the first operand. 3346*0b57cec5SDimitry Andric /// 3347*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3348*0b57cec5SDimitry Andric /// 3349*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 3350*0b57cec5SDimitry Andric /// 3351*0b57cec5SDimitry Andric /// \param __a 3352*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 3353*0b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 3354*0b57cec5SDimitry Andric /// \param __b 3355*0b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 3356*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3357*0b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 3358*0b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 3359*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 3360*0b57cec5SDimitry Andric _mm_cvtsi64_sd(__m128d __a, long long __b) 3361*0b57cec5SDimitry Andric { 3362*0b57cec5SDimitry Andric __a[0] = __b; 3363*0b57cec5SDimitry Andric return __a; 3364*0b57cec5SDimitry Andric } 3365*0b57cec5SDimitry Andric 3366*0b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 3367*0b57cec5SDimitry Andric /// 64-bit signed integer value, according to the current rounding mode. 3368*0b57cec5SDimitry Andric /// 3369*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3370*0b57cec5SDimitry Andric /// 3371*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 3372*0b57cec5SDimitry Andric /// 3373*0b57cec5SDimitry Andric /// \param __a 3374*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 3375*0b57cec5SDimitry Andric /// conversion. 3376*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 3377*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 3378*0b57cec5SDimitry Andric _mm_cvtsd_si64(__m128d __a) 3379*0b57cec5SDimitry Andric { 3380*0b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 3381*0b57cec5SDimitry Andric } 3382*0b57cec5SDimitry Andric 3383*0b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 3384*0b57cec5SDimitry Andric /// 64-bit signed integer value, truncating the result when it is inexact. 3385*0b57cec5SDimitry Andric /// 3386*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3387*0b57cec5SDimitry Andric /// 3388*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 3389*0b57cec5SDimitry Andric /// instruction. 3390*0b57cec5SDimitry Andric /// 3391*0b57cec5SDimitry Andric /// \param __a 3392*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 3393*0b57cec5SDimitry Andric /// conversion. 3394*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 3395*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 3396*0b57cec5SDimitry Andric _mm_cvttsd_si64(__m128d __a) 3397*0b57cec5SDimitry Andric { 3398*0b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 3399*0b57cec5SDimitry Andric } 3400*0b57cec5SDimitry Andric #endif 3401*0b57cec5SDimitry Andric 3402*0b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 3403*0b57cec5SDimitry Andric /// 3404*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3405*0b57cec5SDimitry Andric /// 3406*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 3407*0b57cec5SDimitry Andric /// 3408*0b57cec5SDimitry Andric /// \param __a 3409*0b57cec5SDimitry Andric /// A 128-bit integer vector. 3410*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 3411*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 3412*0b57cec5SDimitry Andric _mm_cvtepi32_ps(__m128i __a) 3413*0b57cec5SDimitry Andric { 3414*0b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4si)__a, __v4sf); 3415*0b57cec5SDimitry Andric } 3416*0b57cec5SDimitry Andric 3417*0b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 3418*0b57cec5SDimitry Andric /// 3419*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3420*0b57cec5SDimitry Andric /// 3421*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 3422*0b57cec5SDimitry Andric /// 3423*0b57cec5SDimitry Andric /// \param __a 3424*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 3425*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 3426*0b57cec5SDimitry Andric /// values. 3427*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3428*0b57cec5SDimitry Andric _mm_cvtps_epi32(__m128 __a) 3429*0b57cec5SDimitry Andric { 3430*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 3431*0b57cec5SDimitry Andric } 3432*0b57cec5SDimitry Andric 3433*0b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32], 3434*0b57cec5SDimitry Andric /// truncating the result when it is inexact. 3435*0b57cec5SDimitry Andric /// 3436*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3437*0b57cec5SDimitry Andric /// 3438*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 3439*0b57cec5SDimitry Andric /// instruction. 3440*0b57cec5SDimitry Andric /// 3441*0b57cec5SDimitry Andric /// \param __a 3442*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 3443*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 3444*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3445*0b57cec5SDimitry Andric _mm_cvttps_epi32(__m128 __a) 3446*0b57cec5SDimitry Andric { 3447*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 3448*0b57cec5SDimitry Andric } 3449*0b57cec5SDimitry Andric 3450*0b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 3451*0b57cec5SDimitry Andric /// operand and the remaining elements are zero. 3452*0b57cec5SDimitry Andric /// 3453*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3454*0b57cec5SDimitry Andric /// 3455*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 3456*0b57cec5SDimitry Andric /// 3457*0b57cec5SDimitry Andric /// \param __a 3458*0b57cec5SDimitry Andric /// A 32-bit signed integer operand. 3459*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 3460*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3461*0b57cec5SDimitry Andric _mm_cvtsi32_si128(int __a) 3462*0b57cec5SDimitry Andric { 3463*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 }; 3464*0b57cec5SDimitry Andric } 3465*0b57cec5SDimitry Andric 3466*0b57cec5SDimitry Andric #ifdef __x86_64__ 3467*0b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 3468*0b57cec5SDimitry Andric /// operand and the upper element is zero. 3469*0b57cec5SDimitry Andric /// 3470*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3471*0b57cec5SDimitry Andric /// 3472*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 3473*0b57cec5SDimitry Andric /// 3474*0b57cec5SDimitry Andric /// \param __a 3475*0b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 3476*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 3477*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3478*0b57cec5SDimitry Andric _mm_cvtsi64_si128(long long __a) 3479*0b57cec5SDimitry Andric { 3480*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __a, 0 }; 3481*0b57cec5SDimitry Andric } 3482*0b57cec5SDimitry Andric #endif 3483*0b57cec5SDimitry Andric 3484*0b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 3485*0b57cec5SDimitry Andric /// 32-bit signed integer value. 3486*0b57cec5SDimitry Andric /// 3487*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3488*0b57cec5SDimitry Andric /// 3489*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 3490*0b57cec5SDimitry Andric /// 3491*0b57cec5SDimitry Andric /// \param __a 3492*0b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 3493*0b57cec5SDimitry Andric /// destination. 3494*0b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 3495*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 3496*0b57cec5SDimitry Andric _mm_cvtsi128_si32(__m128i __a) 3497*0b57cec5SDimitry Andric { 3498*0b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 3499*0b57cec5SDimitry Andric return __b[0]; 3500*0b57cec5SDimitry Andric } 3501*0b57cec5SDimitry Andric 3502*0b57cec5SDimitry Andric #ifdef __x86_64__ 3503*0b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 3504*0b57cec5SDimitry Andric /// 64-bit signed integer value. 3505*0b57cec5SDimitry Andric /// 3506*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3507*0b57cec5SDimitry Andric /// 3508*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 3509*0b57cec5SDimitry Andric /// 3510*0b57cec5SDimitry Andric /// \param __a 3511*0b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 3512*0b57cec5SDimitry Andric /// destination. 3513*0b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 3514*0b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 3515*0b57cec5SDimitry Andric _mm_cvtsi128_si64(__m128i __a) 3516*0b57cec5SDimitry Andric { 3517*0b57cec5SDimitry Andric return __a[0]; 3518*0b57cec5SDimitry Andric } 3519*0b57cec5SDimitry Andric #endif 3520*0b57cec5SDimitry Andric 3521*0b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 3522*0b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 3523*0b57cec5SDimitry Andric /// 3524*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3525*0b57cec5SDimitry Andric /// 3526*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 3527*0b57cec5SDimitry Andric /// 3528*0b57cec5SDimitry Andric /// \param __p 3529*0b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 3530*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 3531*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3532*0b57cec5SDimitry Andric _mm_load_si128(__m128i const *__p) 3533*0b57cec5SDimitry Andric { 3534*0b57cec5SDimitry Andric return *__p; 3535*0b57cec5SDimitry Andric } 3536*0b57cec5SDimitry Andric 3537*0b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 3538*0b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 3539*0b57cec5SDimitry Andric /// 3540*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3541*0b57cec5SDimitry Andric /// 3542*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 3543*0b57cec5SDimitry Andric /// 3544*0b57cec5SDimitry Andric /// \param __p 3545*0b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 3546*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 3547*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3548*0b57cec5SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) 3549*0b57cec5SDimitry Andric { 3550*0b57cec5SDimitry Andric struct __loadu_si128 { 3551*0b57cec5SDimitry Andric __m128i_u __v; 3552*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3553*0b57cec5SDimitry Andric return ((struct __loadu_si128*)__p)->__v; 3554*0b57cec5SDimitry Andric } 3555*0b57cec5SDimitry Andric 3556*0b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 3557*0b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 3558*0b57cec5SDimitry Andric /// 3559*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3560*0b57cec5SDimitry Andric /// 3561*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 3562*0b57cec5SDimitry Andric /// 3563*0b57cec5SDimitry Andric /// \param __p 3564*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 3565*0b57cec5SDimitry Andric /// the destination. 3566*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 3567*0b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 3568*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3569*0b57cec5SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) 3570*0b57cec5SDimitry Andric { 3571*0b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 3572*0b57cec5SDimitry Andric long long __u; 3573*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3574*0b57cec5SDimitry Andric return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; 3575*0b57cec5SDimitry Andric } 3576*0b57cec5SDimitry Andric 3577*0b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 3578*0b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 3579*0b57cec5SDimitry Andric /// argument is required but the value is not actually used. 3580*0b57cec5SDimitry Andric /// 3581*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3582*0b57cec5SDimitry Andric /// 3583*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 3584*0b57cec5SDimitry Andric /// 3585*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 3586*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3587*0b57cec5SDimitry Andric _mm_undefined_si128(void) 3588*0b57cec5SDimitry Andric { 3589*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 3590*0b57cec5SDimitry Andric } 3591*0b57cec5SDimitry Andric 3592*0b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 3593*0b57cec5SDimitry Andric /// the specified 64-bit integer values. 3594*0b57cec5SDimitry Andric /// 3595*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3596*0b57cec5SDimitry Andric /// 3597*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3598*0b57cec5SDimitry Andric /// instruction. 3599*0b57cec5SDimitry Andric /// 3600*0b57cec5SDimitry Andric /// \param __q1 3601*0b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 3602*0b57cec5SDimitry Andric /// destination vector of [2 x i64]. 3603*0b57cec5SDimitry Andric /// \param __q0 3604*0b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 3605*0b57cec5SDimitry Andric /// destination vector of [2 x i64]. 3606*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 3607*0b57cec5SDimitry Andric /// provided in the operands. 3608*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3609*0b57cec5SDimitry Andric _mm_set_epi64x(long long __q1, long long __q0) 3610*0b57cec5SDimitry Andric { 3611*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 3612*0b57cec5SDimitry Andric } 3613*0b57cec5SDimitry Andric 3614*0b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 3615*0b57cec5SDimitry Andric /// the specified 64-bit integer values. 3616*0b57cec5SDimitry Andric /// 3617*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3618*0b57cec5SDimitry Andric /// 3619*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3620*0b57cec5SDimitry Andric /// instruction. 3621*0b57cec5SDimitry Andric /// 3622*0b57cec5SDimitry Andric /// \param __q1 3623*0b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 3624*0b57cec5SDimitry Andric /// destination vector of [2 x i64]. 3625*0b57cec5SDimitry Andric /// \param __q0 3626*0b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 3627*0b57cec5SDimitry Andric /// destination vector of [2 x i64]. 3628*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 3629*0b57cec5SDimitry Andric /// provided in the operands. 3630*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3631*0b57cec5SDimitry Andric _mm_set_epi64(__m64 __q1, __m64 __q0) 3632*0b57cec5SDimitry Andric { 3633*0b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 3634*0b57cec5SDimitry Andric } 3635*0b57cec5SDimitry Andric 3636*0b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 3637*0b57cec5SDimitry Andric /// the specified 32-bit integer values. 3638*0b57cec5SDimitry Andric /// 3639*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3640*0b57cec5SDimitry Andric /// 3641*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3642*0b57cec5SDimitry Andric /// instruction. 3643*0b57cec5SDimitry Andric /// 3644*0b57cec5SDimitry Andric /// \param __i3 3645*0b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 3646*0b57cec5SDimitry Andric /// destination vector. 3647*0b57cec5SDimitry Andric /// \param __i2 3648*0b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 3649*0b57cec5SDimitry Andric /// vector. 3650*0b57cec5SDimitry Andric /// \param __i1 3651*0b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 3652*0b57cec5SDimitry Andric /// vector. 3653*0b57cec5SDimitry Andric /// \param __i0 3654*0b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 3655*0b57cec5SDimitry Andric /// vector. 3656*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 3657*0b57cec5SDimitry Andric /// provided in the operands. 3658*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3659*0b57cec5SDimitry Andric _mm_set_epi32(int __i3, int __i2, int __i1, int __i0) 3660*0b57cec5SDimitry Andric { 3661*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3}; 3662*0b57cec5SDimitry Andric } 3663*0b57cec5SDimitry Andric 3664*0b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 3665*0b57cec5SDimitry Andric /// the specified 16-bit integer values. 3666*0b57cec5SDimitry Andric /// 3667*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3668*0b57cec5SDimitry Andric /// 3669*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3670*0b57cec5SDimitry Andric /// instruction. 3671*0b57cec5SDimitry Andric /// 3672*0b57cec5SDimitry Andric /// \param __w7 3673*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 3674*0b57cec5SDimitry Andric /// destination vector. 3675*0b57cec5SDimitry Andric /// \param __w6 3676*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 3677*0b57cec5SDimitry Andric /// destination vector. 3678*0b57cec5SDimitry Andric /// \param __w5 3679*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 3680*0b57cec5SDimitry Andric /// vector. 3681*0b57cec5SDimitry Andric /// \param __w4 3682*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 3683*0b57cec5SDimitry Andric /// vector. 3684*0b57cec5SDimitry Andric /// \param __w3 3685*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 3686*0b57cec5SDimitry Andric /// vector. 3687*0b57cec5SDimitry Andric /// \param __w2 3688*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 3689*0b57cec5SDimitry Andric /// vector. 3690*0b57cec5SDimitry Andric /// \param __w1 3691*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 3692*0b57cec5SDimitry Andric /// vector. 3693*0b57cec5SDimitry Andric /// \param __w0 3694*0b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 3695*0b57cec5SDimitry Andric /// vector. 3696*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 3697*0b57cec5SDimitry Andric /// provided in the operands. 3698*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3699*0b57cec5SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) 3700*0b57cec5SDimitry Andric { 3701*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 }; 3702*0b57cec5SDimitry Andric } 3703*0b57cec5SDimitry Andric 3704*0b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 3705*0b57cec5SDimitry Andric /// the specified 8-bit integer values. 3706*0b57cec5SDimitry Andric /// 3707*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3708*0b57cec5SDimitry Andric /// 3709*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3710*0b57cec5SDimitry Andric /// instruction. 3711*0b57cec5SDimitry Andric /// 3712*0b57cec5SDimitry Andric /// \param __b15 3713*0b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 3714*0b57cec5SDimitry Andric /// \param __b14 3715*0b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 3716*0b57cec5SDimitry Andric /// \param __b13 3717*0b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 3718*0b57cec5SDimitry Andric /// \param __b12 3719*0b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 3720*0b57cec5SDimitry Andric /// \param __b11 3721*0b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 3722*0b57cec5SDimitry Andric /// \param __b10 3723*0b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 3724*0b57cec5SDimitry Andric /// \param __b9 3725*0b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 3726*0b57cec5SDimitry Andric /// \param __b8 3727*0b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 3728*0b57cec5SDimitry Andric /// \param __b7 3729*0b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 3730*0b57cec5SDimitry Andric /// \param __b6 3731*0b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 3732*0b57cec5SDimitry Andric /// \param __b5 3733*0b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 3734*0b57cec5SDimitry Andric /// \param __b4 3735*0b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 3736*0b57cec5SDimitry Andric /// \param __b3 3737*0b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 3738*0b57cec5SDimitry Andric /// \param __b2 3739*0b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 3740*0b57cec5SDimitry Andric /// \param __b1 3741*0b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 3742*0b57cec5SDimitry Andric /// \param __b0 3743*0b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 3744*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 3745*0b57cec5SDimitry Andric /// provided in the operands. 3746*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3747*0b57cec5SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) 3748*0b57cec5SDimitry Andric { 3749*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 }; 3750*0b57cec5SDimitry Andric } 3751*0b57cec5SDimitry Andric 3752*0b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 3753*0b57cec5SDimitry Andric /// specified 64-bit integer value. 3754*0b57cec5SDimitry Andric /// 3755*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3756*0b57cec5SDimitry Andric /// 3757*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3758*0b57cec5SDimitry Andric /// instruction. 3759*0b57cec5SDimitry Andric /// 3760*0b57cec5SDimitry Andric /// \param __q 3761*0b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 3762*0b57cec5SDimitry Andric /// vector. 3763*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 3764*0b57cec5SDimitry Andric /// elements containing the value provided in the operand. 3765*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3766*0b57cec5SDimitry Andric _mm_set1_epi64x(long long __q) 3767*0b57cec5SDimitry Andric { 3768*0b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 3769*0b57cec5SDimitry Andric } 3770*0b57cec5SDimitry Andric 3771*0b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 3772*0b57cec5SDimitry Andric /// specified 64-bit value. 3773*0b57cec5SDimitry Andric /// 3774*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3775*0b57cec5SDimitry Andric /// 3776*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3777*0b57cec5SDimitry Andric /// instruction. 3778*0b57cec5SDimitry Andric /// 3779*0b57cec5SDimitry Andric /// \param __q 3780*0b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 3781*0b57cec5SDimitry Andric /// vector. 3782*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 3783*0b57cec5SDimitry Andric /// containing the value provided in the operand. 3784*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3785*0b57cec5SDimitry Andric _mm_set1_epi64(__m64 __q) 3786*0b57cec5SDimitry Andric { 3787*0b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 3788*0b57cec5SDimitry Andric } 3789*0b57cec5SDimitry Andric 3790*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 3791*0b57cec5SDimitry Andric /// specified 32-bit value. 3792*0b57cec5SDimitry Andric /// 3793*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3794*0b57cec5SDimitry Andric /// 3795*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3796*0b57cec5SDimitry Andric /// instruction. 3797*0b57cec5SDimitry Andric /// 3798*0b57cec5SDimitry Andric /// \param __i 3799*0b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 3800*0b57cec5SDimitry Andric /// vector. 3801*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 3802*0b57cec5SDimitry Andric /// containing the value provided in the operand. 3803*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3804*0b57cec5SDimitry Andric _mm_set1_epi32(int __i) 3805*0b57cec5SDimitry Andric { 3806*0b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 3807*0b57cec5SDimitry Andric } 3808*0b57cec5SDimitry Andric 3809*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 3810*0b57cec5SDimitry Andric /// specified 16-bit value. 3811*0b57cec5SDimitry Andric /// 3812*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3813*0b57cec5SDimitry Andric /// 3814*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3815*0b57cec5SDimitry Andric /// instruction. 3816*0b57cec5SDimitry Andric /// 3817*0b57cec5SDimitry Andric /// \param __w 3818*0b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 3819*0b57cec5SDimitry Andric /// vector. 3820*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 3821*0b57cec5SDimitry Andric /// containing the value provided in the operand. 3822*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3823*0b57cec5SDimitry Andric _mm_set1_epi16(short __w) 3824*0b57cec5SDimitry Andric { 3825*0b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 3826*0b57cec5SDimitry Andric } 3827*0b57cec5SDimitry Andric 3828*0b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 3829*0b57cec5SDimitry Andric /// specified 8-bit value. 3830*0b57cec5SDimitry Andric /// 3831*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3832*0b57cec5SDimitry Andric /// 3833*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3834*0b57cec5SDimitry Andric /// instruction. 3835*0b57cec5SDimitry Andric /// 3836*0b57cec5SDimitry Andric /// \param __b 3837*0b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 3838*0b57cec5SDimitry Andric /// vector. 3839*0b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 3840*0b57cec5SDimitry Andric /// containing the value provided in the operand. 3841*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3842*0b57cec5SDimitry Andric _mm_set1_epi8(char __b) 3843*0b57cec5SDimitry Andric { 3844*0b57cec5SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); 3845*0b57cec5SDimitry Andric } 3846*0b57cec5SDimitry Andric 3847*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 3848*0b57cec5SDimitry Andric /// with the specified 64-bit integral values. 3849*0b57cec5SDimitry Andric /// 3850*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3851*0b57cec5SDimitry Andric /// 3852*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 3853*0b57cec5SDimitry Andric /// 3854*0b57cec5SDimitry Andric /// \param __q0 3855*0b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 3856*0b57cec5SDimitry Andric /// result. 3857*0b57cec5SDimitry Andric /// \param __q1 3858*0b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 3859*0b57cec5SDimitry Andric /// result. 3860*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3861*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3862*0b57cec5SDimitry Andric _mm_setr_epi64(__m64 __q0, __m64 __q1) 3863*0b57cec5SDimitry Andric { 3864*0b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 3865*0b57cec5SDimitry Andric } 3866*0b57cec5SDimitry Andric 3867*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 3868*0b57cec5SDimitry Andric /// with the specified 32-bit integral values. 3869*0b57cec5SDimitry Andric /// 3870*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3871*0b57cec5SDimitry Andric /// 3872*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3873*0b57cec5SDimitry Andric /// instruction. 3874*0b57cec5SDimitry Andric /// 3875*0b57cec5SDimitry Andric /// \param __i0 3876*0b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 3877*0b57cec5SDimitry Andric /// \param __i1 3878*0b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 3879*0b57cec5SDimitry Andric /// \param __i2 3880*0b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 3881*0b57cec5SDimitry Andric /// \param __i3 3882*0b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 3883*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3884*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3885*0b57cec5SDimitry Andric _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) 3886*0b57cec5SDimitry Andric { 3887*0b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 3888*0b57cec5SDimitry Andric } 3889*0b57cec5SDimitry Andric 3890*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 3891*0b57cec5SDimitry Andric /// with the specified 16-bit integral values. 3892*0b57cec5SDimitry Andric /// 3893*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3894*0b57cec5SDimitry Andric /// 3895*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3896*0b57cec5SDimitry Andric /// instruction. 3897*0b57cec5SDimitry Andric /// 3898*0b57cec5SDimitry Andric /// \param __w0 3899*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 3900*0b57cec5SDimitry Andric /// \param __w1 3901*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 3902*0b57cec5SDimitry Andric /// \param __w2 3903*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 3904*0b57cec5SDimitry Andric /// \param __w3 3905*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 3906*0b57cec5SDimitry Andric /// \param __w4 3907*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 3908*0b57cec5SDimitry Andric /// \param __w5 3909*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 3910*0b57cec5SDimitry Andric /// \param __w6 3911*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 3912*0b57cec5SDimitry Andric /// \param __w7 3913*0b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 3914*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3915*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3916*0b57cec5SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) 3917*0b57cec5SDimitry Andric { 3918*0b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 3919*0b57cec5SDimitry Andric } 3920*0b57cec5SDimitry Andric 3921*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 3922*0b57cec5SDimitry Andric /// with the specified 8-bit integral values. 3923*0b57cec5SDimitry Andric /// 3924*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3925*0b57cec5SDimitry Andric /// 3926*0b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 3927*0b57cec5SDimitry Andric /// instruction. 3928*0b57cec5SDimitry Andric /// 3929*0b57cec5SDimitry Andric /// \param __b0 3930*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 3931*0b57cec5SDimitry Andric /// \param __b1 3932*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 3933*0b57cec5SDimitry Andric /// \param __b2 3934*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 3935*0b57cec5SDimitry Andric /// \param __b3 3936*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 3937*0b57cec5SDimitry Andric /// \param __b4 3938*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 3939*0b57cec5SDimitry Andric /// \param __b5 3940*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 3941*0b57cec5SDimitry Andric /// \param __b6 3942*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 3943*0b57cec5SDimitry Andric /// \param __b7 3944*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 3945*0b57cec5SDimitry Andric /// \param __b8 3946*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 3947*0b57cec5SDimitry Andric /// \param __b9 3948*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 3949*0b57cec5SDimitry Andric /// \param __b10 3950*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 3951*0b57cec5SDimitry Andric /// \param __b11 3952*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 3953*0b57cec5SDimitry Andric /// \param __b12 3954*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 3955*0b57cec5SDimitry Andric /// \param __b13 3956*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 3957*0b57cec5SDimitry Andric /// \param __b14 3958*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 3959*0b57cec5SDimitry Andric /// \param __b15 3960*0b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 3961*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3962*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3963*0b57cec5SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) 3964*0b57cec5SDimitry Andric { 3965*0b57cec5SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 3966*0b57cec5SDimitry Andric } 3967*0b57cec5SDimitry Andric 3968*0b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 3969*0b57cec5SDimitry Andric /// 3970*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3971*0b57cec5SDimitry Andric /// 3972*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 3973*0b57cec5SDimitry Andric /// 3974*0b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 3975*0b57cec5SDimitry Andric /// zero. 3976*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3977*0b57cec5SDimitry Andric _mm_setzero_si128(void) 3978*0b57cec5SDimitry Andric { 3979*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ 0LL, 0LL }; 3980*0b57cec5SDimitry Andric } 3981*0b57cec5SDimitry Andric 3982*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 3983*0b57cec5SDimitry Andric /// 128-bit boundary. 3984*0b57cec5SDimitry Andric /// 3985*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3986*0b57cec5SDimitry Andric /// 3987*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 3988*0b57cec5SDimitry Andric /// 3989*0b57cec5SDimitry Andric /// \param __p 3990*0b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 3991*0b57cec5SDimitry Andric /// values. 3992*0b57cec5SDimitry Andric /// \param __b 3993*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 3994*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 3995*0b57cec5SDimitry Andric _mm_store_si128(__m128i *__p, __m128i __b) 3996*0b57cec5SDimitry Andric { 3997*0b57cec5SDimitry Andric *__p = __b; 3998*0b57cec5SDimitry Andric } 3999*0b57cec5SDimitry Andric 4000*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 4001*0b57cec5SDimitry Andric /// 4002*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4003*0b57cec5SDimitry Andric /// 4004*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 4005*0b57cec5SDimitry Andric /// 4006*0b57cec5SDimitry Andric /// \param __p 4007*0b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 4008*0b57cec5SDimitry Andric /// \param __b 4009*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 4010*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4011*0b57cec5SDimitry Andric _mm_storeu_si128(__m128i_u *__p, __m128i __b) 4012*0b57cec5SDimitry Andric { 4013*0b57cec5SDimitry Andric struct __storeu_si128 { 4014*0b57cec5SDimitry Andric __m128i_u __v; 4015*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4016*0b57cec5SDimitry Andric ((struct __storeu_si128*)__p)->__v = __b; 4017*0b57cec5SDimitry Andric } 4018*0b57cec5SDimitry Andric 4019*0b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 4020*0b57cec5SDimitry Andric /// vector. 4021*0b57cec5SDimitry Andric /// 4022*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4023*0b57cec5SDimitry Andric /// 4024*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 4025*0b57cec5SDimitry Andric /// 4026*0b57cec5SDimitry Andric /// \param __p 4027*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 4028*0b57cec5SDimitry Andric /// location does not have to be algned. 4029*0b57cec5SDimitry Andric /// \param __b 4030*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 4031*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4032*0b57cec5SDimitry Andric _mm_storeu_si64(void *__p, __m128i __b) 4033*0b57cec5SDimitry Andric { 4034*0b57cec5SDimitry Andric struct __storeu_si64 { 4035*0b57cec5SDimitry Andric long long __v; 4036*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4037*0b57cec5SDimitry Andric ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0]; 4038*0b57cec5SDimitry Andric } 4039*0b57cec5SDimitry Andric 4040*0b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 4041*0b57cec5SDimitry Andric /// vector. 4042*0b57cec5SDimitry Andric /// 4043*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4044*0b57cec5SDimitry Andric /// 4045*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 4046*0b57cec5SDimitry Andric /// 4047*0b57cec5SDimitry Andric /// \param __p 4048*0b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 4049*0b57cec5SDimitry Andric /// location does not have to be aligned. 4050*0b57cec5SDimitry Andric /// \param __b 4051*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 4052*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4053*0b57cec5SDimitry Andric _mm_storeu_si32(void *__p, __m128i __b) 4054*0b57cec5SDimitry Andric { 4055*0b57cec5SDimitry Andric struct __storeu_si32 { 4056*0b57cec5SDimitry Andric int __v; 4057*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4058*0b57cec5SDimitry Andric ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0]; 4059*0b57cec5SDimitry Andric } 4060*0b57cec5SDimitry Andric 4061*0b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 4062*0b57cec5SDimitry Andric /// vector. 4063*0b57cec5SDimitry Andric /// 4064*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4065*0b57cec5SDimitry Andric /// 4066*0b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 4067*0b57cec5SDimitry Andric /// 4068*0b57cec5SDimitry Andric /// \param __p 4069*0b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 4070*0b57cec5SDimitry Andric /// location does not have to be aligned. 4071*0b57cec5SDimitry Andric /// \param __b 4072*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 4073*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4074*0b57cec5SDimitry Andric _mm_storeu_si16(void *__p, __m128i __b) 4075*0b57cec5SDimitry Andric { 4076*0b57cec5SDimitry Andric struct __storeu_si16 { 4077*0b57cec5SDimitry Andric short __v; 4078*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4079*0b57cec5SDimitry Andric ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0]; 4080*0b57cec5SDimitry Andric } 4081*0b57cec5SDimitry Andric 4082*0b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 4083*0b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 4084*0b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 4085*0b57cec5SDimitry Andric /// 4086*0b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 4087*0b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 4088*0b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 4089*0b57cec5SDimitry Andric /// 4090*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4091*0b57cec5SDimitry Andric /// 4092*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 4093*0b57cec5SDimitry Andric /// instruction. 4094*0b57cec5SDimitry Andric /// 4095*0b57cec5SDimitry Andric /// \param __d 4096*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 4097*0b57cec5SDimitry Andric /// \param __n 4098*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 4099*0b57cec5SDimitry Andric /// each byte represents the mask bits. 4100*0b57cec5SDimitry Andric /// \param __p 4101*0b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 4102*0b57cec5SDimitry Andric /// values are moved. 4103*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4104*0b57cec5SDimitry Andric _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) 4105*0b57cec5SDimitry Andric { 4106*0b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 4107*0b57cec5SDimitry Andric } 4108*0b57cec5SDimitry Andric 4109*0b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 4110*0b57cec5SDimitry Andric /// a memory location. 4111*0b57cec5SDimitry Andric /// 4112*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4113*0b57cec5SDimitry Andric /// 4114*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 4115*0b57cec5SDimitry Andric /// 4116*0b57cec5SDimitry Andric /// \param __p 4117*0b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 4118*0b57cec5SDimitry Andric /// of the integer vector parameter. 4119*0b57cec5SDimitry Andric /// \param __a 4120*0b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 4121*0b57cec5SDimitry Andric /// value to be stored. 4122*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4123*0b57cec5SDimitry Andric _mm_storel_epi64(__m128i_u *__p, __m128i __a) 4124*0b57cec5SDimitry Andric { 4125*0b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 4126*0b57cec5SDimitry Andric long long __u; 4127*0b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4128*0b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; 4129*0b57cec5SDimitry Andric } 4130*0b57cec5SDimitry Andric 4131*0b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 4132*0b57cec5SDimitry Andric /// aligned memory location. 4133*0b57cec5SDimitry Andric /// 4134*0b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 4135*0b57cec5SDimitry Andric /// used again soon). 4136*0b57cec5SDimitry Andric /// 4137*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4138*0b57cec5SDimitry Andric /// 4139*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 4140*0b57cec5SDimitry Andric /// 4141*0b57cec5SDimitry Andric /// \param __p 4142*0b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 4143*0b57cec5SDimitry Andric /// \param __a 4144*0b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 4145*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4146*0b57cec5SDimitry Andric _mm_stream_pd(double *__p, __m128d __a) 4147*0b57cec5SDimitry Andric { 4148*0b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p); 4149*0b57cec5SDimitry Andric } 4150*0b57cec5SDimitry Andric 4151*0b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 4152*0b57cec5SDimitry Andric /// 4153*0b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 4154*0b57cec5SDimitry Andric /// used again soon). 4155*0b57cec5SDimitry Andric /// 4156*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4157*0b57cec5SDimitry Andric /// 4158*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 4159*0b57cec5SDimitry Andric /// 4160*0b57cec5SDimitry Andric /// \param __p 4161*0b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 4162*0b57cec5SDimitry Andric /// \param __a 4163*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 4164*0b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 4165*0b57cec5SDimitry Andric _mm_stream_si128(__m128i *__p, __m128i __a) 4166*0b57cec5SDimitry Andric { 4167*0b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p); 4168*0b57cec5SDimitry Andric } 4169*0b57cec5SDimitry Andric 4170*0b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 4171*0b57cec5SDimitry Andric /// 4172*0b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 4173*0b57cec5SDimitry Andric /// used again soon). 4174*0b57cec5SDimitry Andric /// 4175*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4176*0b57cec5SDimitry Andric /// 4177*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 4178*0b57cec5SDimitry Andric /// 4179*0b57cec5SDimitry Andric /// \param __p 4180*0b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 4181*0b57cec5SDimitry Andric /// \param __a 4182*0b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 4183*0b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 4184*0b57cec5SDimitry Andric _mm_stream_si32(int *__p, int __a) 4185*0b57cec5SDimitry Andric { 4186*0b57cec5SDimitry Andric __builtin_ia32_movnti(__p, __a); 4187*0b57cec5SDimitry Andric } 4188*0b57cec5SDimitry Andric 4189*0b57cec5SDimitry Andric #ifdef __x86_64__ 4190*0b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 4191*0b57cec5SDimitry Andric /// 4192*0b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 4193*0b57cec5SDimitry Andric /// used again soon). 4194*0b57cec5SDimitry Andric /// 4195*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4196*0b57cec5SDimitry Andric /// 4197*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 4198*0b57cec5SDimitry Andric /// 4199*0b57cec5SDimitry Andric /// \param __p 4200*0b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 4201*0b57cec5SDimitry Andric /// \param __a 4202*0b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 4203*0b57cec5SDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 4204*0b57cec5SDimitry Andric _mm_stream_si64(long long *__p, long long __a) 4205*0b57cec5SDimitry Andric { 4206*0b57cec5SDimitry Andric __builtin_ia32_movnti64(__p, __a); 4207*0b57cec5SDimitry Andric } 4208*0b57cec5SDimitry Andric #endif 4209*0b57cec5SDimitry Andric 4210*0b57cec5SDimitry Andric #if defined(__cplusplus) 4211*0b57cec5SDimitry Andric extern "C" { 4212*0b57cec5SDimitry Andric #endif 4213*0b57cec5SDimitry Andric 4214*0b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 4215*0b57cec5SDimitry Andric /// caches in the coherency domain. 4216*0b57cec5SDimitry Andric /// 4217*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4218*0b57cec5SDimitry Andric /// 4219*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 4220*0b57cec5SDimitry Andric /// 4221*0b57cec5SDimitry Andric /// \param __p 4222*0b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 4223*0b57cec5SDimitry Andric /// flushed. 4224*0b57cec5SDimitry Andric void _mm_clflush(void const * __p); 4225*0b57cec5SDimitry Andric 4226*0b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 4227*0b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 4228*0b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 4229*0b57cec5SDimitry Andric /// executing subsequent loads. 4230*0b57cec5SDimitry Andric /// 4231*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4232*0b57cec5SDimitry Andric /// 4233*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 4234*0b57cec5SDimitry Andric /// 4235*0b57cec5SDimitry Andric void _mm_lfence(void); 4236*0b57cec5SDimitry Andric 4237*0b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 4238*0b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 4239*0b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 4240*0b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 4241*0b57cec5SDimitry Andric /// 4242*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4243*0b57cec5SDimitry Andric /// 4244*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 4245*0b57cec5SDimitry Andric /// 4246*0b57cec5SDimitry Andric void _mm_mfence(void); 4247*0b57cec5SDimitry Andric 4248*0b57cec5SDimitry Andric #if defined(__cplusplus) 4249*0b57cec5SDimitry Andric } // extern "C" 4250*0b57cec5SDimitry Andric #endif 4251*0b57cec5SDimitry Andric 4252*0b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 4253*0b57cec5SDimitry Andric /// operands into 8-bit signed integers, and packs the results into the 4254*0b57cec5SDimitry Andric /// destination. Positive values greater than 0x7F are saturated to 0x7F. 4255*0b57cec5SDimitry Andric /// Negative values less than 0x80 are saturated to 0x80. 4256*0b57cec5SDimitry Andric /// 4257*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4258*0b57cec5SDimitry Andric /// 4259*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 4260*0b57cec5SDimitry Andric /// 4261*0b57cec5SDimitry Andric /// \param __a 4262*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 4263*0b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 4264*0b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 4265*0b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 4266*0b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 4267*0b57cec5SDimitry Andric /// \param __b 4268*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 4269*0b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 4270*0b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 4271*0b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 4272*0b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 4273*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 4274*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4275*0b57cec5SDimitry Andric _mm_packs_epi16(__m128i __a, __m128i __b) 4276*0b57cec5SDimitry Andric { 4277*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 4278*0b57cec5SDimitry Andric } 4279*0b57cec5SDimitry Andric 4280*0b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector 4281*0b57cec5SDimitry Andric /// operands into 16-bit signed integers, and packs the results into the 4282*0b57cec5SDimitry Andric /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. 4283*0b57cec5SDimitry Andric /// Negative values less than 0x8000 are saturated to 0x8000. 4284*0b57cec5SDimitry Andric /// 4285*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4286*0b57cec5SDimitry Andric /// 4287*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 4288*0b57cec5SDimitry Andric /// 4289*0b57cec5SDimitry Andric /// \param __a 4290*0b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 4291*0b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 4292*0b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 4293*0b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 4294*0b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 4295*0b57cec5SDimitry Andric /// \param __b 4296*0b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 4297*0b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 4298*0b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 4299*0b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 4300*0b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 4301*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 4302*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4303*0b57cec5SDimitry Andric _mm_packs_epi32(__m128i __a, __m128i __b) 4304*0b57cec5SDimitry Andric { 4305*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 4306*0b57cec5SDimitry Andric } 4307*0b57cec5SDimitry Andric 4308*0b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 4309*0b57cec5SDimitry Andric /// operands into 8-bit unsigned integers, and packs the results into the 4310*0b57cec5SDimitry Andric /// destination. Values greater than 0xFF are saturated to 0xFF. Values less 4311*0b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. 4312*0b57cec5SDimitry Andric /// 4313*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4314*0b57cec5SDimitry Andric /// 4315*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 4316*0b57cec5SDimitry Andric /// 4317*0b57cec5SDimitry Andric /// \param __a 4318*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 4319*0b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 4320*0b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 4321*0b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 4322*0b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 4323*0b57cec5SDimitry Andric /// \param __b 4324*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 4325*0b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 4326*0b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 4327*0b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 4328*0b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 4329*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 4330*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4331*0b57cec5SDimitry Andric _mm_packus_epi16(__m128i __a, __m128i __b) 4332*0b57cec5SDimitry Andric { 4333*0b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 4334*0b57cec5SDimitry Andric } 4335*0b57cec5SDimitry Andric 4336*0b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 4337*0b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 4338*0b57cec5SDimitry Andric /// 4339*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4340*0b57cec5SDimitry Andric /// 4341*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 4342*0b57cec5SDimitry Andric /// 4343*0b57cec5SDimitry Andric /// \param __a 4344*0b57cec5SDimitry Andric /// A 128-bit integer vector. 4345*0b57cec5SDimitry Andric /// \param __imm 4346*0b57cec5SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a __a to be assigned 4347*0b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 4348*0b57cec5SDimitry Andric /// 000: assign values from bits [15:0] of \a __a. \n 4349*0b57cec5SDimitry Andric /// 001: assign values from bits [31:16] of \a __a. \n 4350*0b57cec5SDimitry Andric /// 010: assign values from bits [47:32] of \a __a. \n 4351*0b57cec5SDimitry Andric /// 011: assign values from bits [63:48] of \a __a. \n 4352*0b57cec5SDimitry Andric /// 100: assign values from bits [79:64] of \a __a. \n 4353*0b57cec5SDimitry Andric /// 101: assign values from bits [95:80] of \a __a. \n 4354*0b57cec5SDimitry Andric /// 110: assign values from bits [111:96] of \a __a. \n 4355*0b57cec5SDimitry Andric /// 111: assign values from bits [127:112] of \a __a. 4356*0b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 4357*0b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 4358*0b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 4359*0b57cec5SDimitry Andric (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 4360*0b57cec5SDimitry Andric (int)(imm)) 4361*0b57cec5SDimitry Andric 4362*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 4363*0b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 4364*0b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 4365*0b57cec5SDimitry Andric /// parameter. 4366*0b57cec5SDimitry Andric /// 4367*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4368*0b57cec5SDimitry Andric /// 4369*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 4370*0b57cec5SDimitry Andric /// 4371*0b57cec5SDimitry Andric /// \param __a 4372*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 4373*0b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 4374*0b57cec5SDimitry Andric /// the lower 16 bits of \a __b. 4375*0b57cec5SDimitry Andric /// \param __b 4376*0b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 4377*0b57cec5SDimitry Andric /// result beginning at an offset specified by \a __imm. 4378*0b57cec5SDimitry Andric /// \param __imm 4379*0b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 4380*0b57cec5SDimitry Andric /// lower 16 bits of \a __b are written. 4381*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 4382*0b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 4383*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 4384*0b57cec5SDimitry Andric (int)(imm)) 4385*0b57cec5SDimitry Andric 4386*0b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 4387*0b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 4388*0b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 4389*0b57cec5SDimitry Andric /// 4390*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4391*0b57cec5SDimitry Andric /// 4392*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 4393*0b57cec5SDimitry Andric /// 4394*0b57cec5SDimitry Andric /// \param __a 4395*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 4396*0b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 4397*0b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 4398*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 4399*0b57cec5SDimitry Andric _mm_movemask_epi8(__m128i __a) 4400*0b57cec5SDimitry Andric { 4401*0b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 4402*0b57cec5SDimitry Andric } 4403*0b57cec5SDimitry Andric 4404*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 4405*0b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 4406*0b57cec5SDimitry Andric /// parameter as a specifier. 4407*0b57cec5SDimitry Andric /// 4408*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4409*0b57cec5SDimitry Andric /// 4410*0b57cec5SDimitry Andric /// \code 4411*0b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 4412*0b57cec5SDimitry Andric /// \endcode 4413*0b57cec5SDimitry Andric /// 4414*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 4415*0b57cec5SDimitry Andric /// 4416*0b57cec5SDimitry Andric /// \param a 4417*0b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 4418*0b57cec5SDimitry Andric /// \param imm 4419*0b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 4420*0b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 4421*0b57cec5SDimitry Andric /// values as follows: \n 4422*0b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 4423*0b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 4424*0b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 4425*0b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 4426*0b57cec5SDimitry Andric /// Bit value assignments: \n 4427*0b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 4428*0b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 4429*0b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 4430*0b57cec5SDimitry Andric /// 11: assign values from bits [127:96] of \a a. 4431*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 4432*0b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 4433*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)) 4434*0b57cec5SDimitry Andric 4435*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 4436*0b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 4437*0b57cec5SDimitry Andric /// value parameter as a specifier. 4438*0b57cec5SDimitry Andric /// 4439*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4440*0b57cec5SDimitry Andric /// 4441*0b57cec5SDimitry Andric /// \code 4442*0b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 4443*0b57cec5SDimitry Andric /// \endcode 4444*0b57cec5SDimitry Andric /// 4445*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 4446*0b57cec5SDimitry Andric /// 4447*0b57cec5SDimitry Andric /// \param a 4448*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 4449*0b57cec5SDimitry Andric /// [127:64] of the result. 4450*0b57cec5SDimitry Andric /// \param imm 4451*0b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 4452*0b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 4453*0b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 4454*0b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 4455*0b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 4456*0b57cec5SDimitry Andric /// Bit value assignments: \n 4457*0b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 4458*0b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 4459*0b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 4460*0b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 4461*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 4462*0b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 4463*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)) 4464*0b57cec5SDimitry Andric 4465*0b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 4466*0b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 4467*0b57cec5SDimitry Andric /// value parameter as a specifier. 4468*0b57cec5SDimitry Andric /// 4469*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4470*0b57cec5SDimitry Andric /// 4471*0b57cec5SDimitry Andric /// \code 4472*0b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 4473*0b57cec5SDimitry Andric /// \endcode 4474*0b57cec5SDimitry Andric /// 4475*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 4476*0b57cec5SDimitry Andric /// 4477*0b57cec5SDimitry Andric /// \param a 4478*0b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 4479*0b57cec5SDimitry Andric /// [63:0] of the result. 4480*0b57cec5SDimitry Andric /// \param imm 4481*0b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 4482*0b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 4483*0b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 4484*0b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 4485*0b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 4486*0b57cec5SDimitry Andric /// Bit value assignments: \n 4487*0b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 4488*0b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 4489*0b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 4490*0b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 4491*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 4492*0b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 4493*0b57cec5SDimitry Andric (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)) 4494*0b57cec5SDimitry Andric 4495*0b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 4496*0b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 4497*0b57cec5SDimitry Andric /// 4498*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4499*0b57cec5SDimitry Andric /// 4500*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 4501*0b57cec5SDimitry Andric /// instruction. 4502*0b57cec5SDimitry Andric /// 4503*0b57cec5SDimitry Andric /// \param __a 4504*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 4505*0b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 4506*0b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 4507*0b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 4508*0b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 4509*0b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 4510*0b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 4511*0b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 4512*0b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 4513*0b57cec5SDimitry Andric /// \param __b 4514*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 4515*0b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 4516*0b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 4517*0b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 4518*0b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 4519*0b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 4520*0b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 4521*0b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 4522*0b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 4523*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 4524*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4525*0b57cec5SDimitry Andric _mm_unpackhi_epi8(__m128i __a, __m128i __b) 4526*0b57cec5SDimitry Andric { 4527*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 4528*0b57cec5SDimitry Andric } 4529*0b57cec5SDimitry Andric 4530*0b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 4531*0b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 4532*0b57cec5SDimitry Andric /// 4533*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4534*0b57cec5SDimitry Andric /// 4535*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 4536*0b57cec5SDimitry Andric /// instruction. 4537*0b57cec5SDimitry Andric /// 4538*0b57cec5SDimitry Andric /// \param __a 4539*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 4540*0b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 4541*0b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 4542*0b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 4543*0b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 4544*0b57cec5SDimitry Andric /// \param __b 4545*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 4546*0b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 4547*0b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 4548*0b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 4549*0b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 4550*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 4551*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4552*0b57cec5SDimitry Andric _mm_unpackhi_epi16(__m128i __a, __m128i __b) 4553*0b57cec5SDimitry Andric { 4554*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); 4555*0b57cec5SDimitry Andric } 4556*0b57cec5SDimitry Andric 4557*0b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 4558*0b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 4559*0b57cec5SDimitry Andric /// 4560*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4561*0b57cec5SDimitry Andric /// 4562*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 4563*0b57cec5SDimitry Andric /// instruction. 4564*0b57cec5SDimitry Andric /// 4565*0b57cec5SDimitry Andric /// \param __a 4566*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 4567*0b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 4568*0b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 4569*0b57cec5SDimitry Andric /// \param __b 4570*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 4571*0b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 4572*0b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 4573*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 4574*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4575*0b57cec5SDimitry Andric _mm_unpackhi_epi32(__m128i __a, __m128i __b) 4576*0b57cec5SDimitry Andric { 4577*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); 4578*0b57cec5SDimitry Andric } 4579*0b57cec5SDimitry Andric 4580*0b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 4581*0b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 4582*0b57cec5SDimitry Andric /// 4583*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4584*0b57cec5SDimitry Andric /// 4585*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 4586*0b57cec5SDimitry Andric /// instruction. 4587*0b57cec5SDimitry Andric /// 4588*0b57cec5SDimitry Andric /// \param __a 4589*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 4590*0b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 4591*0b57cec5SDimitry Andric /// \param __b 4592*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 4593*0b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 4594*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 4595*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4596*0b57cec5SDimitry Andric _mm_unpackhi_epi64(__m128i __a, __m128i __b) 4597*0b57cec5SDimitry Andric { 4598*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1); 4599*0b57cec5SDimitry Andric } 4600*0b57cec5SDimitry Andric 4601*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 4602*0b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 4603*0b57cec5SDimitry Andric /// 4604*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4605*0b57cec5SDimitry Andric /// 4606*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 4607*0b57cec5SDimitry Andric /// instruction. 4608*0b57cec5SDimitry Andric /// 4609*0b57cec5SDimitry Andric /// \param __a 4610*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 4611*0b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 4612*0b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 4613*0b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 4614*0b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 4615*0b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 4616*0b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 4617*0b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 4618*0b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 4619*0b57cec5SDimitry Andric /// \param __b 4620*0b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 4621*0b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 4622*0b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 4623*0b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 4624*0b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 4625*0b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 4626*0b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 4627*0b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 4628*0b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 4629*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 4630*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4631*0b57cec5SDimitry Andric _mm_unpacklo_epi8(__m128i __a, __m128i __b) 4632*0b57cec5SDimitry Andric { 4633*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); 4634*0b57cec5SDimitry Andric } 4635*0b57cec5SDimitry Andric 4636*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 4637*0b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 4638*0b57cec5SDimitry Andric /// [8 x i16]. 4639*0b57cec5SDimitry Andric /// 4640*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4641*0b57cec5SDimitry Andric /// 4642*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 4643*0b57cec5SDimitry Andric /// instruction. 4644*0b57cec5SDimitry Andric /// 4645*0b57cec5SDimitry Andric /// \param __a 4646*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 4647*0b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 4648*0b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 4649*0b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 4650*0b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 4651*0b57cec5SDimitry Andric /// \param __b 4652*0b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 4653*0b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 4654*0b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 4655*0b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 4656*0b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 4657*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 4658*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4659*0b57cec5SDimitry Andric _mm_unpacklo_epi16(__m128i __a, __m128i __b) 4660*0b57cec5SDimitry Andric { 4661*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); 4662*0b57cec5SDimitry Andric } 4663*0b57cec5SDimitry Andric 4664*0b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 4665*0b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 4666*0b57cec5SDimitry Andric /// 4667*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4668*0b57cec5SDimitry Andric /// 4669*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 4670*0b57cec5SDimitry Andric /// instruction. 4671*0b57cec5SDimitry Andric /// 4672*0b57cec5SDimitry Andric /// \param __a 4673*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 4674*0b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 4675*0b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 4676*0b57cec5SDimitry Andric /// \param __b 4677*0b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 4678*0b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 4679*0b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 4680*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 4681*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4682*0b57cec5SDimitry Andric _mm_unpacklo_epi32(__m128i __a, __m128i __b) 4683*0b57cec5SDimitry Andric { 4684*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); 4685*0b57cec5SDimitry Andric } 4686*0b57cec5SDimitry Andric 4687*0b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 4688*0b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 4689*0b57cec5SDimitry Andric /// 4690*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4691*0b57cec5SDimitry Andric /// 4692*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 4693*0b57cec5SDimitry Andric /// instruction. 4694*0b57cec5SDimitry Andric /// 4695*0b57cec5SDimitry Andric /// \param __a 4696*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 4697*0b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 4698*0b57cec5SDimitry Andric /// \param __b 4699*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 4700*0b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 4701*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 4702*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4703*0b57cec5SDimitry Andric _mm_unpacklo_epi64(__m128i __a, __m128i __b) 4704*0b57cec5SDimitry Andric { 4705*0b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0); 4706*0b57cec5SDimitry Andric } 4707*0b57cec5SDimitry Andric 4708*0b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 4709*0b57cec5SDimitry Andric /// integer. 4710*0b57cec5SDimitry Andric /// 4711*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4712*0b57cec5SDimitry Andric /// 4713*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 4714*0b57cec5SDimitry Andric /// 4715*0b57cec5SDimitry Andric /// \param __a 4716*0b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 4717*0b57cec5SDimitry Andric /// destination. 4718*0b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 4719*0b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4720*0b57cec5SDimitry Andric _mm_movepi64_pi64(__m128i __a) 4721*0b57cec5SDimitry Andric { 4722*0b57cec5SDimitry Andric return (__m64)__a[0]; 4723*0b57cec5SDimitry Andric } 4724*0b57cec5SDimitry Andric 4725*0b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 4726*0b57cec5SDimitry Andric /// upper bits. 4727*0b57cec5SDimitry Andric /// 4728*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4729*0b57cec5SDimitry Andric /// 4730*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 4731*0b57cec5SDimitry Andric /// 4732*0b57cec5SDimitry Andric /// \param __a 4733*0b57cec5SDimitry Andric /// A 64-bit value. 4734*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 4735*0b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 4736*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4737*0b57cec5SDimitry Andric _mm_movpi64_epi64(__m64 __a) 4738*0b57cec5SDimitry Andric { 4739*0b57cec5SDimitry Andric return __extension__ (__m128i)(__v2di){ (long long)__a, 0 }; 4740*0b57cec5SDimitry Andric } 4741*0b57cec5SDimitry Andric 4742*0b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 4743*0b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 4744*0b57cec5SDimitry Andric /// 4745*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4746*0b57cec5SDimitry Andric /// 4747*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 4748*0b57cec5SDimitry Andric /// 4749*0b57cec5SDimitry Andric /// \param __a 4750*0b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 4751*0b57cec5SDimitry Andric /// destination. 4752*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 4753*0b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 4754*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4755*0b57cec5SDimitry Andric _mm_move_epi64(__m128i __a) 4756*0b57cec5SDimitry Andric { 4757*0b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 4758*0b57cec5SDimitry Andric } 4759*0b57cec5SDimitry Andric 4760*0b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 4761*0b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 4762*0b57cec5SDimitry Andric /// double]. 4763*0b57cec5SDimitry Andric /// 4764*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4765*0b57cec5SDimitry Andric /// 4766*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 4767*0b57cec5SDimitry Andric /// 4768*0b57cec5SDimitry Andric /// \param __a 4769*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 4770*0b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 4771*0b57cec5SDimitry Andric /// \param __b 4772*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 4773*0b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 4774*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 4775*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4776*0b57cec5SDimitry Andric _mm_unpackhi_pd(__m128d __a, __m128d __b) 4777*0b57cec5SDimitry Andric { 4778*0b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1); 4779*0b57cec5SDimitry Andric } 4780*0b57cec5SDimitry Andric 4781*0b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 4782*0b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 4783*0b57cec5SDimitry Andric /// double]. 4784*0b57cec5SDimitry Andric /// 4785*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4786*0b57cec5SDimitry Andric /// 4787*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 4788*0b57cec5SDimitry Andric /// 4789*0b57cec5SDimitry Andric /// \param __a 4790*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 4791*0b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 4792*0b57cec5SDimitry Andric /// \param __b 4793*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 4794*0b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 4795*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 4796*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4797*0b57cec5SDimitry Andric _mm_unpacklo_pd(__m128d __a, __m128d __b) 4798*0b57cec5SDimitry Andric { 4799*0b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0); 4800*0b57cec5SDimitry Andric } 4801*0b57cec5SDimitry Andric 4802*0b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 4803*0b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 4804*0b57cec5SDimitry Andric /// low-order bits of the destination. 4805*0b57cec5SDimitry Andric /// 4806*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4807*0b57cec5SDimitry Andric /// 4808*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 4809*0b57cec5SDimitry Andric /// 4810*0b57cec5SDimitry Andric /// \param __a 4811*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 4812*0b57cec5SDimitry Andric /// be extracted. 4813*0b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 4814*0b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 4815*0b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 4816*0b57cec5SDimitry Andric _mm_movemask_pd(__m128d __a) 4817*0b57cec5SDimitry Andric { 4818*0b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 4819*0b57cec5SDimitry Andric } 4820*0b57cec5SDimitry Andric 4821*0b57cec5SDimitry Andric 4822*0b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 4823*0b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 4824*0b57cec5SDimitry Andric /// parameter as a specifier. 4825*0b57cec5SDimitry Andric /// 4826*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4827*0b57cec5SDimitry Andric /// 4828*0b57cec5SDimitry Andric /// \code 4829*0b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 4830*0b57cec5SDimitry Andric /// \endcode 4831*0b57cec5SDimitry Andric /// 4832*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 4833*0b57cec5SDimitry Andric /// 4834*0b57cec5SDimitry Andric /// \param a 4835*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4836*0b57cec5SDimitry Andric /// \param b 4837*0b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4838*0b57cec5SDimitry Andric /// \param i 4839*0b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 4840*0b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 4841*0b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 4842*0b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 4843*0b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 4844*0b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 4845*0b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 4846*0b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 4847*0b57cec5SDimitry Andric (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 4848*0b57cec5SDimitry Andric (int)(i)) 4849*0b57cec5SDimitry Andric 4850*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 4851*0b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 4852*0b57cec5SDimitry Andric /// 4853*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4854*0b57cec5SDimitry Andric /// 4855*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4856*0b57cec5SDimitry Andric /// 4857*0b57cec5SDimitry Andric /// \param __a 4858*0b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 4859*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 4860*0b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4861*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 4862*0b57cec5SDimitry Andric _mm_castpd_ps(__m128d __a) 4863*0b57cec5SDimitry Andric { 4864*0b57cec5SDimitry Andric return (__m128)__a; 4865*0b57cec5SDimitry Andric } 4866*0b57cec5SDimitry Andric 4867*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 4868*0b57cec5SDimitry Andric /// integer vector. 4869*0b57cec5SDimitry Andric /// 4870*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4871*0b57cec5SDimitry Andric /// 4872*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4873*0b57cec5SDimitry Andric /// 4874*0b57cec5SDimitry Andric /// \param __a 4875*0b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 4876*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 4877*0b57cec5SDimitry Andric /// parameter. 4878*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4879*0b57cec5SDimitry Andric _mm_castpd_si128(__m128d __a) 4880*0b57cec5SDimitry Andric { 4881*0b57cec5SDimitry Andric return (__m128i)__a; 4882*0b57cec5SDimitry Andric } 4883*0b57cec5SDimitry Andric 4884*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 4885*0b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 4886*0b57cec5SDimitry Andric /// 4887*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4888*0b57cec5SDimitry Andric /// 4889*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4890*0b57cec5SDimitry Andric /// 4891*0b57cec5SDimitry Andric /// \param __a 4892*0b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 4893*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 4894*0b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4895*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4896*0b57cec5SDimitry Andric _mm_castps_pd(__m128 __a) 4897*0b57cec5SDimitry Andric { 4898*0b57cec5SDimitry Andric return (__m128d)__a; 4899*0b57cec5SDimitry Andric } 4900*0b57cec5SDimitry Andric 4901*0b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 4902*0b57cec5SDimitry Andric /// integer vector. 4903*0b57cec5SDimitry Andric /// 4904*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4905*0b57cec5SDimitry Andric /// 4906*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4907*0b57cec5SDimitry Andric /// 4908*0b57cec5SDimitry Andric /// \param __a 4909*0b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 4910*0b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 4911*0b57cec5SDimitry Andric /// parameter. 4912*0b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4913*0b57cec5SDimitry Andric _mm_castps_si128(__m128 __a) 4914*0b57cec5SDimitry Andric { 4915*0b57cec5SDimitry Andric return (__m128i)__a; 4916*0b57cec5SDimitry Andric } 4917*0b57cec5SDimitry Andric 4918*0b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 4919*0b57cec5SDimitry Andric /// of [4 x float]. 4920*0b57cec5SDimitry Andric /// 4921*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4922*0b57cec5SDimitry Andric /// 4923*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4924*0b57cec5SDimitry Andric /// 4925*0b57cec5SDimitry Andric /// \param __a 4926*0b57cec5SDimitry Andric /// A 128-bit integer vector. 4927*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 4928*0b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4929*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 4930*0b57cec5SDimitry Andric _mm_castsi128_ps(__m128i __a) 4931*0b57cec5SDimitry Andric { 4932*0b57cec5SDimitry Andric return (__m128)__a; 4933*0b57cec5SDimitry Andric } 4934*0b57cec5SDimitry Andric 4935*0b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 4936*0b57cec5SDimitry Andric /// of [2 x double]. 4937*0b57cec5SDimitry Andric /// 4938*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4939*0b57cec5SDimitry Andric /// 4940*0b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 4941*0b57cec5SDimitry Andric /// 4942*0b57cec5SDimitry Andric /// \param __a 4943*0b57cec5SDimitry Andric /// A 128-bit integer vector. 4944*0b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 4945*0b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4946*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 4947*0b57cec5SDimitry Andric _mm_castsi128_pd(__m128i __a) 4948*0b57cec5SDimitry Andric { 4949*0b57cec5SDimitry Andric return (__m128d)__a; 4950*0b57cec5SDimitry Andric } 4951*0b57cec5SDimitry Andric 4952*0b57cec5SDimitry Andric #if defined(__cplusplus) 4953*0b57cec5SDimitry Andric extern "C" { 4954*0b57cec5SDimitry Andric #endif 4955*0b57cec5SDimitry Andric 4956*0b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 4957*0b57cec5SDimitry Andric /// optimizing power consumption during the loop. 4958*0b57cec5SDimitry Andric /// 4959*0b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4960*0b57cec5SDimitry Andric /// 4961*0b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 4962*0b57cec5SDimitry Andric /// 4963*0b57cec5SDimitry Andric void _mm_pause(void); 4964*0b57cec5SDimitry Andric 4965*0b57cec5SDimitry Andric #if defined(__cplusplus) 4966*0b57cec5SDimitry Andric } // extern "C" 4967*0b57cec5SDimitry Andric #endif 4968*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 4969*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 4970*0b57cec5SDimitry Andric 4971*0b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 4972*0b57cec5SDimitry Andric 4973*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040) 4974*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000) 4975*0b57cec5SDimitry Andric 4976*0b57cec5SDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040) 4977*0b57cec5SDimitry Andric 4978*0b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 4979*0b57cec5SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 4980*0b57cec5SDimitry Andric 4981*0b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4982