10b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 110b57cec5SDimitry Andric #define __EMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <xmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 200b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 23*81ad6265SDimitry Andric typedef long long __m128i_u 24*81ad6265SDimitry Andric __attribute__((__vector_size__(16), __aligned__(1))); 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric /* Type defines. */ 270b57cec5SDimitry Andric typedef double __v2df __attribute__((__vector_size__(16))); 280b57cec5SDimitry Andric typedef long long __v2di __attribute__((__vector_size__(16))); 290b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 300b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric /* Unsigned types */ 330b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__((__vector_size__(16))); 340b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 350b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 380b57cec5SDimitry Andric * appear in the interface though. */ 390b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 42*81ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS \ 43*81ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ 44*81ad6265SDimitry Andric __min_vector_width__(128))) 45*81ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX \ 46*81ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), \ 47*81ad6265SDimitry Andric __min_vector_width__(64))) 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 500b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 510b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 520b57cec5SDimitry Andric /// 530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 540b57cec5SDimitry Andric /// 550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 560b57cec5SDimitry Andric /// 570b57cec5SDimitry Andric /// \param __a 580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 590b57cec5SDimitry Andric /// \param __b 600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 610b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 620b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 630b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 64*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, 65*81ad6265SDimitry Andric __m128d __b) { 660b57cec5SDimitry Andric __a[0] += __b[0]; 670b57cec5SDimitry Andric return __a; 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 710b57cec5SDimitry Andric /// 720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 730b57cec5SDimitry Andric /// 740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 750b57cec5SDimitry Andric /// 760b57cec5SDimitry Andric /// \param __a 770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 780b57cec5SDimitry Andric /// \param __b 790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 800b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 810b57cec5SDimitry Andric /// operands. 82*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, 83*81ad6265SDimitry Andric __m128d __b) { 840b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 880b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 890b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 900b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 910b57cec5SDimitry Andric /// operand. 920b57cec5SDimitry Andric /// 930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 940b57cec5SDimitry Andric /// 950b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 960b57cec5SDimitry Andric /// 970b57cec5SDimitry Andric /// \param __a 980b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 990b57cec5SDimitry Andric /// \param __b 1000b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1010b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1020b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 1030b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 104*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, 105*81ad6265SDimitry Andric __m128d __b) { 1060b57cec5SDimitry Andric __a[0] -= __b[0]; 1070b57cec5SDimitry Andric return __a; 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 1110b57cec5SDimitry Andric /// 1120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1130b57cec5SDimitry Andric /// 1140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 1150b57cec5SDimitry Andric /// 1160b57cec5SDimitry Andric /// \param __a 1170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1180b57cec5SDimitry Andric /// \param __b 1190b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1200b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 1210b57cec5SDimitry Andric /// both operands. 122*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, 123*81ad6265SDimitry Andric __m128d __b) { 1240b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 1280b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 1290b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1300b57cec5SDimitry Andric /// operand. 1310b57cec5SDimitry Andric /// 1320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1330b57cec5SDimitry Andric /// 1340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 1350b57cec5SDimitry Andric /// 1360b57cec5SDimitry Andric /// \param __a 1370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1380b57cec5SDimitry Andric /// \param __b 1390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1400b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1410b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 1420b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 143*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, 144*81ad6265SDimitry Andric __m128d __b) { 1450b57cec5SDimitry Andric __a[0] *= __b[0]; 1460b57cec5SDimitry Andric return __a; 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 1500b57cec5SDimitry Andric /// 1510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1520b57cec5SDimitry Andric /// 1530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 1540b57cec5SDimitry Andric /// 1550b57cec5SDimitry Andric /// \param __a 1560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1570b57cec5SDimitry Andric /// \param __b 1580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1590b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 1600b57cec5SDimitry Andric /// operands. 161*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, 162*81ad6265SDimitry Andric __m128d __b) { 1630b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 1670b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 1680b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 1690b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1700b57cec5SDimitry Andric /// operand. 1710b57cec5SDimitry Andric /// 1720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1730b57cec5SDimitry Andric /// 1740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 1750b57cec5SDimitry Andric /// 1760b57cec5SDimitry Andric /// \param __a 1770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1780b57cec5SDimitry Andric /// \param __b 1790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 1800b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1810b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 1820b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 183*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, 184*81ad6265SDimitry Andric __m128d __b) { 1850b57cec5SDimitry Andric __a[0] /= __b[0]; 1860b57cec5SDimitry Andric return __a; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 1900b57cec5SDimitry Andric /// [2 x double]. 1910b57cec5SDimitry Andric /// 1920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1930b57cec5SDimitry Andric /// 1940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 1950b57cec5SDimitry Andric /// 1960b57cec5SDimitry Andric /// \param __a 1970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1980b57cec5SDimitry Andric /// \param __b 1990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 2000b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 2010b57cec5SDimitry Andric /// operands. 202*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, 203*81ad6265SDimitry Andric __m128d __b) { 2040b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 2080b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 2090b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 2100b57cec5SDimitry Andric /// double-precision value of the first operand. 2110b57cec5SDimitry Andric /// 2120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2130b57cec5SDimitry Andric /// 2140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 2150b57cec5SDimitry Andric /// 2160b57cec5SDimitry Andric /// \param __a 2170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2180b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 2190b57cec5SDimitry Andric /// result. 2200b57cec5SDimitry Andric /// \param __b 2210b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2220b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 2230b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2240b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 2250b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 226*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, 227*81ad6265SDimitry Andric __m128d __b) { 2280b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 2290b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 2330b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2340b57cec5SDimitry Andric /// 2350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2360b57cec5SDimitry Andric /// 2370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 2380b57cec5SDimitry Andric /// 2390b57cec5SDimitry Andric /// \param __a 2400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 2410b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 2420b57cec5SDimitry Andric /// values in the operand. 243*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { 2440b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2480b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 2490b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2500b57cec5SDimitry Andric /// double-precision value of the first operand. 2510b57cec5SDimitry Andric /// 2520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2530b57cec5SDimitry Andric /// 2540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 2550b57cec5SDimitry Andric /// 2560b57cec5SDimitry Andric /// \param __a 2570b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2580b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2590b57cec5SDimitry Andric /// \param __b 2600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2610b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2620b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2630b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 2640b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 265*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, 266*81ad6265SDimitry Andric __m128d __b) { 2670b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 2710b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the lesser of each pair of 2720b57cec5SDimitry Andric /// values. 2730b57cec5SDimitry Andric /// 2740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2750b57cec5SDimitry Andric /// 2760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 2770b57cec5SDimitry Andric /// 2780b57cec5SDimitry Andric /// \param __a 2790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2800b57cec5SDimitry Andric /// \param __b 2810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2820b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 2830b57cec5SDimitry Andric /// between both operands. 284*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, 285*81ad6265SDimitry Andric __m128d __b) { 2860b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2900b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 2910b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2920b57cec5SDimitry Andric /// double-precision value of the first operand. 2930b57cec5SDimitry Andric /// 2940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2950b57cec5SDimitry Andric /// 2960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 2970b57cec5SDimitry Andric /// 2980b57cec5SDimitry Andric /// \param __a 2990b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3000b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3010b57cec5SDimitry Andric /// \param __b 3020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3030b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3040b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3050b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 3060b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 307*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, 308*81ad6265SDimitry Andric __m128d __b) { 3090b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 3130b57cec5SDimitry Andric /// [2 x double] and returns the vector containing the greater of each pair 3140b57cec5SDimitry Andric /// of values. 3150b57cec5SDimitry Andric /// 3160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3170b57cec5SDimitry Andric /// 3180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 3190b57cec5SDimitry Andric /// 3200b57cec5SDimitry Andric /// \param __a 3210b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3220b57cec5SDimitry Andric /// \param __b 3230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3240b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 3250b57cec5SDimitry Andric /// between both operands. 326*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, 327*81ad6265SDimitry Andric __m128d __b) { 3280b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 3320b57cec5SDimitry Andric /// 3330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3340b57cec5SDimitry Andric /// 3350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 3360b57cec5SDimitry Andric /// 3370b57cec5SDimitry Andric /// \param __a 3380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3390b57cec5SDimitry Andric /// \param __b 3400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3410b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3420b57cec5SDimitry Andric /// values between both operands. 343*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, 344*81ad6265SDimitry Andric __m128d __b) { 3450b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 3490b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 3500b57cec5SDimitry Andric /// 3510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3520b57cec5SDimitry Andric /// 3530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 3540b57cec5SDimitry Andric /// 3550b57cec5SDimitry Andric /// \param __a 3560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 3570b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 3580b57cec5SDimitry Andric /// \param __b 3590b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 3600b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3610b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 3620b57cec5SDimitry Andric /// operand. 363*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, 364*81ad6265SDimitry Andric __m128d __b) { 3650b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 3690b57cec5SDimitry Andric /// 3700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3710b57cec5SDimitry Andric /// 3720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 3730b57cec5SDimitry Andric /// 3740b57cec5SDimitry Andric /// \param __a 3750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3760b57cec5SDimitry Andric /// \param __b 3770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3780b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 3790b57cec5SDimitry Andric /// values between both operands. 380*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, 381*81ad6265SDimitry Andric __m128d __b) { 3820b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 3860b57cec5SDimitry Andric /// 3870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3880b57cec5SDimitry Andric /// 3890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 3900b57cec5SDimitry Andric /// 3910b57cec5SDimitry Andric /// \param __a 3920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3930b57cec5SDimitry Andric /// \param __b 3940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3950b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 3960b57cec5SDimitry Andric /// values between both operands. 397*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, 398*81ad6265SDimitry Andric __m128d __b) { 3990b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4030b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0 4040b57cec5SDimitry Andric /// for false, 0xFFFFFFFFFFFFFFFF for true. 4050b57cec5SDimitry Andric /// 4060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4070b57cec5SDimitry Andric /// 4080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 4090b57cec5SDimitry Andric /// 4100b57cec5SDimitry Andric /// \param __a 4110b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4120b57cec5SDimitry Andric /// \param __b 4130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4140b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 415*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, 416*81ad6265SDimitry Andric __m128d __b) { 4170b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4210b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4220b57cec5SDimitry Andric /// operand are less than those in the second operand. Each comparison 4230b57cec5SDimitry Andric /// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4240b57cec5SDimitry Andric /// 4250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4260b57cec5SDimitry Andric /// 4270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4280b57cec5SDimitry Andric /// 4290b57cec5SDimitry Andric /// \param __a 4300b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4310b57cec5SDimitry Andric /// \param __b 4320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4330b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 434*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, 435*81ad6265SDimitry Andric __m128d __b) { 4360b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4400b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4410b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 4420b57cec5SDimitry Andric /// 4430b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4440b57cec5SDimitry Andric /// 4450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4460b57cec5SDimitry Andric /// 4470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4480b57cec5SDimitry Andric /// 4490b57cec5SDimitry Andric /// \param __a 4500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4510b57cec5SDimitry Andric /// \param __b 4520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4530b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 454*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, 455*81ad6265SDimitry Andric __m128d __b) { 4560b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 4570b57cec5SDimitry Andric } 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4600b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4610b57cec5SDimitry Andric /// operand are greater than those in the second operand. 4620b57cec5SDimitry Andric /// 4630b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4640b57cec5SDimitry Andric /// 4650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4660b57cec5SDimitry Andric /// 4670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4680b57cec5SDimitry Andric /// 4690b57cec5SDimitry Andric /// \param __a 4700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4710b57cec5SDimitry Andric /// \param __b 4720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4730b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 474*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, 475*81ad6265SDimitry Andric __m128d __b) { 4760b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4800b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4810b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 4820b57cec5SDimitry Andric /// 4830b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4840b57cec5SDimitry Andric /// 4850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4860b57cec5SDimitry Andric /// 4870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4880b57cec5SDimitry Andric /// 4890b57cec5SDimitry Andric /// \param __a 4900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4910b57cec5SDimitry Andric /// \param __b 4920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4930b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 494*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, 495*81ad6265SDimitry Andric __m128d __b) { 4960b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5000b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5010b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 5020b57cec5SDimitry Andric /// 5030b57cec5SDimitry Andric /// A pair of double-precision values are "ordered" with respect to each 5040b57cec5SDimitry Andric /// other if neither value is a NaN. Each comparison yields 0x0 for false, 5050b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 5060b57cec5SDimitry Andric /// 5070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5080b57cec5SDimitry Andric /// 5090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 5100b57cec5SDimitry Andric /// 5110b57cec5SDimitry Andric /// \param __a 5120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5130b57cec5SDimitry Andric /// \param __b 5140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5150b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 516*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, 517*81ad6265SDimitry Andric __m128d __b) { 5180b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5220b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5230b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 5240b57cec5SDimitry Andric /// 5250b57cec5SDimitry Andric /// A pair of double-precision values are "unordered" with respect to each 5260b57cec5SDimitry Andric /// other if one or both values are NaN. Each comparison yields 0x0 for 5270b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 5280b57cec5SDimitry Andric /// 5290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5300b57cec5SDimitry Andric /// 5310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 5320b57cec5SDimitry Andric /// instruction. 5330b57cec5SDimitry Andric /// 5340b57cec5SDimitry Andric /// \param __a 5350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5360b57cec5SDimitry Andric /// \param __b 5370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5380b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 539*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, 540*81ad6265SDimitry Andric __m128d __b) { 5410b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5450b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5460b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 5470b57cec5SDimitry Andric /// 5480b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5490b57cec5SDimitry Andric /// 5500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5510b57cec5SDimitry Andric /// 5520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 5530b57cec5SDimitry Andric /// 5540b57cec5SDimitry Andric /// \param __a 5550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5560b57cec5SDimitry Andric /// \param __b 5570b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5580b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 559*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, 560*81ad6265SDimitry Andric __m128d __b) { 5610b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5650b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5660b57cec5SDimitry Andric /// operand are not less than those in the second operand. 5670b57cec5SDimitry Andric /// 5680b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5690b57cec5SDimitry Andric /// 5700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5710b57cec5SDimitry Andric /// 5720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 5730b57cec5SDimitry Andric /// 5740b57cec5SDimitry Andric /// \param __a 5750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5760b57cec5SDimitry Andric /// \param __b 5770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5780b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 579*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, 580*81ad6265SDimitry Andric __m128d __b) { 5810b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 5820b57cec5SDimitry Andric } 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5850b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5860b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 5870b57cec5SDimitry Andric /// 5880b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5890b57cec5SDimitry Andric /// 5900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5910b57cec5SDimitry Andric /// 5920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 5930b57cec5SDimitry Andric /// 5940b57cec5SDimitry Andric /// \param __a 5950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5960b57cec5SDimitry Andric /// \param __b 5970b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5980b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 599*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, 600*81ad6265SDimitry Andric __m128d __b) { 6010b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 6020b57cec5SDimitry Andric } 6030b57cec5SDimitry Andric 6040b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6050b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6060b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 6070b57cec5SDimitry Andric /// 6080b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6090b57cec5SDimitry Andric /// 6100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6110b57cec5SDimitry Andric /// 6120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6130b57cec5SDimitry Andric /// 6140b57cec5SDimitry Andric /// \param __a 6150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6160b57cec5SDimitry Andric /// \param __b 6170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6180b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 619*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, 620*81ad6265SDimitry Andric __m128d __b) { 6210b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6250b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6260b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 6270b57cec5SDimitry Andric /// 6280b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6290b57cec5SDimitry Andric /// 6300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6310b57cec5SDimitry Andric /// 6320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6330b57cec5SDimitry Andric /// 6340b57cec5SDimitry Andric /// \param __a 6350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6360b57cec5SDimitry Andric /// \param __b 6370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6380b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 639*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, 640*81ad6265SDimitry Andric __m128d __b) { 6410b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6450b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 6460b57cec5SDimitry Andric /// 6470b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6480b57cec5SDimitry Andric /// 6490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6500b57cec5SDimitry Andric /// 6510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 6520b57cec5SDimitry Andric /// 6530b57cec5SDimitry Andric /// \param __a 6540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6550b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6560b57cec5SDimitry Andric /// \param __b 6570b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6580b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6590b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6600b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 661*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, 662*81ad6265SDimitry Andric __m128d __b) { 6630b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6670b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6680b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 6690b57cec5SDimitry Andric /// the second parameter. 6700b57cec5SDimitry Andric /// 6710b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6720b57cec5SDimitry Andric /// 6730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6740b57cec5SDimitry Andric /// 6750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 6760b57cec5SDimitry Andric /// 6770b57cec5SDimitry Andric /// \param __a 6780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6790b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6800b57cec5SDimitry Andric /// \param __b 6810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6820b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6830b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6840b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 685*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, 686*81ad6265SDimitry Andric __m128d __b) { 6870b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6910b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6920b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 6930b57cec5SDimitry Andric /// corresponding value in the second parameter. 6940b57cec5SDimitry Andric /// 6950b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6960b57cec5SDimitry Andric /// 6970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6980b57cec5SDimitry Andric /// 6990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7000b57cec5SDimitry Andric /// 7010b57cec5SDimitry Andric /// \param __a 7020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7030b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7040b57cec5SDimitry Andric /// \param __b 7050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7060b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7070b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7080b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 709*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, 710*81ad6265SDimitry Andric __m128d __b) { 7110b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7150b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7160b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 7170b57cec5SDimitry Andric /// in the second parameter. 7180b57cec5SDimitry Andric /// 7190b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7200b57cec5SDimitry Andric /// 7210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7220b57cec5SDimitry Andric /// 7230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7240b57cec5SDimitry Andric /// 7250b57cec5SDimitry Andric /// \param __a 7260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7270b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7280b57cec5SDimitry Andric /// \param __b 7290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7300b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7310b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7320b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 733*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, 734*81ad6265SDimitry Andric __m128d __b) { 7350b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 7360b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7400b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7410b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 7420b57cec5SDimitry Andric /// corresponding value in the second parameter. 7430b57cec5SDimitry Andric /// 7440b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7450b57cec5SDimitry Andric /// 7460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7470b57cec5SDimitry Andric /// 7480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7490b57cec5SDimitry Andric /// 7500b57cec5SDimitry Andric /// \param __a 7510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7520b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7530b57cec5SDimitry Andric /// \param __b 7540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7550b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7560b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7570b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 758*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, 759*81ad6265SDimitry Andric __m128d __b) { 7600b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 7610b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7650b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7660b57cec5SDimitry Andric /// the value in the first parameter is "ordered" with respect to the 7670b57cec5SDimitry Andric /// corresponding value in the second parameter. 7680b57cec5SDimitry Andric /// 7690b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 7700b57cec5SDimitry Andric /// of double-precision values are "ordered" with respect to each other if 7710b57cec5SDimitry Andric /// neither value is a NaN. 7720b57cec5SDimitry Andric /// 7730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7740b57cec5SDimitry Andric /// 7750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 7760b57cec5SDimitry Andric /// 7770b57cec5SDimitry Andric /// \param __a 7780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7790b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7800b57cec5SDimitry Andric /// \param __b 7810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7820b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7830b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7840b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 785*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, 786*81ad6265SDimitry Andric __m128d __b) { 7870b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric 7900b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7910b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7920b57cec5SDimitry Andric /// the value in the first parameter is "unordered" with respect to the 7930b57cec5SDimitry Andric /// corresponding value in the second parameter. 7940b57cec5SDimitry Andric /// 7950b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 7960b57cec5SDimitry Andric /// of double-precision values are "unordered" with respect to each other if 7970b57cec5SDimitry Andric /// one or both values are NaN. 7980b57cec5SDimitry Andric /// 7990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8000b57cec5SDimitry Andric /// 8010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 8020b57cec5SDimitry Andric /// instruction. 8030b57cec5SDimitry Andric /// 8040b57cec5SDimitry Andric /// \param __a 8050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8060b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8070b57cec5SDimitry Andric /// \param __b 8080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8090b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8100b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8110b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 812*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, 813*81ad6265SDimitry Andric __m128d __b) { 8140b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 8150b57cec5SDimitry Andric } 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8180b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8190b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 8200b57cec5SDimitry Andric /// the second parameter. 8210b57cec5SDimitry Andric /// 8220b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8230b57cec5SDimitry Andric /// 8240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8250b57cec5SDimitry Andric /// 8260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 8270b57cec5SDimitry Andric /// 8280b57cec5SDimitry Andric /// \param __a 8290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8300b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8310b57cec5SDimitry Andric /// \param __b 8320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8330b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8340b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8350b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 836*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, 837*81ad6265SDimitry Andric __m128d __b) { 8380b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 8390b57cec5SDimitry Andric } 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8420b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8430b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 8440b57cec5SDimitry Andric /// value in the second parameter. 8450b57cec5SDimitry Andric /// 8460b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8470b57cec5SDimitry Andric /// 8480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8490b57cec5SDimitry Andric /// 8500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8510b57cec5SDimitry Andric /// 8520b57cec5SDimitry Andric /// \param __a 8530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8540b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8550b57cec5SDimitry Andric /// \param __b 8560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8570b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8580b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8590b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 860*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, 861*81ad6265SDimitry Andric __m128d __b) { 8620b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8660b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8670b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 8680b57cec5SDimitry Andric /// corresponding value in the second parameter. 8690b57cec5SDimitry Andric /// 8700b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8710b57cec5SDimitry Andric /// 8720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8730b57cec5SDimitry Andric /// 8740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 8750b57cec5SDimitry Andric /// 8760b57cec5SDimitry Andric /// \param __a 8770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8780b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8790b57cec5SDimitry Andric /// \param __b 8800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8810b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8820b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8830b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 884*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, 885*81ad6265SDimitry Andric __m128d __b) { 8860b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8900b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8910b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 8920b57cec5SDimitry Andric /// value in the second parameter. 8930b57cec5SDimitry Andric /// 8940b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8950b57cec5SDimitry Andric /// 8960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8970b57cec5SDimitry Andric /// 8980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8990b57cec5SDimitry Andric /// 9000b57cec5SDimitry Andric /// \param __a 9010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9020b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9030b57cec5SDimitry Andric /// \param __b 9040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9050b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9060b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9070b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 908*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, 909*81ad6265SDimitry Andric __m128d __b) { 9100b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 9110b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9120b57cec5SDimitry Andric } 9130b57cec5SDimitry Andric 9140b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9150b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9160b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 9170b57cec5SDimitry Andric /// corresponding value in the second parameter. 9180b57cec5SDimitry Andric /// 9190b57cec5SDimitry Andric /// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9200b57cec5SDimitry Andric /// 9210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9220b57cec5SDimitry Andric /// 9230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9240b57cec5SDimitry Andric /// 9250b57cec5SDimitry Andric /// \param __a 9260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9270b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9280b57cec5SDimitry Andric /// \param __b 9290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9300b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9310b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9320b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 933*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, 934*81ad6265SDimitry Andric __m128d __b) { 9350b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 9360b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9370b57cec5SDimitry Andric } 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9400b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 9410b57cec5SDimitry Andric /// 9420b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9430b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9440b57cec5SDimitry Andric /// 9450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9460b57cec5SDimitry Andric /// 9470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9480b57cec5SDimitry Andric /// 9490b57cec5SDimitry Andric /// \param __a 9500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9510b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9520b57cec5SDimitry Andric /// \param __b 9530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9540b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9550b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9560b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 957*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, 958*81ad6265SDimitry Andric __m128d __b) { 9590b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9630b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9640b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 9650b57cec5SDimitry Andric /// the second parameter. 9660b57cec5SDimitry Andric /// 9670b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9680b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9690b57cec5SDimitry Andric /// 9700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9710b57cec5SDimitry Andric /// 9720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9730b57cec5SDimitry Andric /// 9740b57cec5SDimitry Andric /// \param __a 9750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9760b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9770b57cec5SDimitry Andric /// \param __b 9780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9790b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9800b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 9810b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 982*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, 983*81ad6265SDimitry Andric __m128d __b) { 9840b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 9850b57cec5SDimitry Andric } 9860b57cec5SDimitry Andric 9870b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9880b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9890b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 9900b57cec5SDimitry Andric /// corresponding value in the second parameter. 9910b57cec5SDimitry Andric /// 9920b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 9930b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 9940b57cec5SDimitry Andric /// 9950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9960b57cec5SDimitry Andric /// 9970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9980b57cec5SDimitry Andric /// 9990b57cec5SDimitry Andric /// \param __a 10000b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10010b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10020b57cec5SDimitry Andric /// \param __b 10030b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10040b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10050b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10060b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1007*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, 1008*81ad6265SDimitry Andric __m128d __b) { 10090b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 10100b57cec5SDimitry Andric } 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10130b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10140b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 10150b57cec5SDimitry Andric /// in the second parameter. 10160b57cec5SDimitry Andric /// 10170b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10180b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10190b57cec5SDimitry Andric /// 10200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10210b57cec5SDimitry Andric /// 10220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10230b57cec5SDimitry Andric /// 10240b57cec5SDimitry Andric /// \param __a 10250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10260b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10270b57cec5SDimitry Andric /// \param __b 10280b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10290b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10300b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10310b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1032*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, 1033*81ad6265SDimitry Andric __m128d __b) { 10340b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 10350b57cec5SDimitry Andric } 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10380b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10390b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 10400b57cec5SDimitry Andric /// corresponding value in the second parameter. 10410b57cec5SDimitry Andric /// 10420b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10430b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 10440b57cec5SDimitry Andric /// 10450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10460b57cec5SDimitry Andric /// 10470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10480b57cec5SDimitry Andric /// 10490b57cec5SDimitry Andric /// \param __a 10500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10510b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10520b57cec5SDimitry Andric /// \param __b 10530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10540b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10550b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10560b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1057*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, 1058*81ad6265SDimitry Andric __m128d __b) { 10590b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 10600b57cec5SDimitry Andric } 10610b57cec5SDimitry Andric 10620b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10630b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10640b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 10650b57cec5SDimitry Andric /// the second parameter. 10660b57cec5SDimitry Andric /// 10670b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 10680b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 10690b57cec5SDimitry Andric /// 10700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10710b57cec5SDimitry Andric /// 10720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10730b57cec5SDimitry Andric /// 10740b57cec5SDimitry Andric /// \param __a 10750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10760b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10770b57cec5SDimitry Andric /// \param __b 10780b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10790b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10800b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 10810b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 1082*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, 1083*81ad6265SDimitry Andric __m128d __b) { 10840b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10880b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. The 10890b57cec5SDimitry Andric /// comparison yields 0 for false, 1 for true. 10900b57cec5SDimitry Andric /// 10910b57cec5SDimitry Andric /// If either of the two lower double-precision values is NaN, 0 is returned. 10920b57cec5SDimitry Andric /// 10930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10940b57cec5SDimitry Andric /// 10950b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 10960b57cec5SDimitry Andric /// 10970b57cec5SDimitry Andric /// \param __a 10980b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10990b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11000b57cec5SDimitry Andric /// \param __b 11010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11020b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11030b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11040b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1105*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, 1106*81ad6265SDimitry Andric __m128d __b) { 11070b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 11080b57cec5SDimitry Andric } 11090b57cec5SDimitry Andric 11100b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11110b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11120b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 11130b57cec5SDimitry Andric /// the second parameter. 11140b57cec5SDimitry Andric /// 11150b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11160b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11170b57cec5SDimitry Andric /// 11180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11190b57cec5SDimitry Andric /// 11200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11210b57cec5SDimitry Andric /// 11220b57cec5SDimitry Andric /// \param __a 11230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11240b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11250b57cec5SDimitry Andric /// \param __b 11260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11270b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11280b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11290b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1130*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, 1131*81ad6265SDimitry Andric __m128d __b) { 11320b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11360b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11370b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 11380b57cec5SDimitry Andric /// corresponding value in the second parameter. 11390b57cec5SDimitry Andric /// 11400b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11410b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11420b57cec5SDimitry Andric /// 11430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11440b57cec5SDimitry Andric /// 11450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11460b57cec5SDimitry Andric /// 11470b57cec5SDimitry Andric /// \param __a 11480b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11490b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11500b57cec5SDimitry Andric /// \param __b 11510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11520b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11530b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11540b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1155*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, 1156*81ad6265SDimitry Andric __m128d __b) { 11570b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11610b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11620b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 11630b57cec5SDimitry Andric /// in the second parameter. 11640b57cec5SDimitry Andric /// 11650b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 11660b57cec5SDimitry Andric /// double-precision values is NaN, 0 is returned. 11670b57cec5SDimitry Andric /// 11680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11690b57cec5SDimitry Andric /// 11700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11710b57cec5SDimitry Andric /// 11720b57cec5SDimitry Andric /// \param __a 11730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11740b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11750b57cec5SDimitry Andric /// \param __b 11760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11770b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11780b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 11790b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1180*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, 1181*81ad6265SDimitry Andric __m128d __b) { 11820b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 11830b57cec5SDimitry Andric } 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11860b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11870b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 11880b57cec5SDimitry Andric /// corresponding value in the second parameter. 11890b57cec5SDimitry Andric /// 11900b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two 11910b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 11920b57cec5SDimitry Andric /// 11930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11940b57cec5SDimitry Andric /// 11950b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11960b57cec5SDimitry Andric /// 11970b57cec5SDimitry Andric /// \param __a 11980b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11990b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12000b57cec5SDimitry Andric /// \param __b 12010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12020b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12030b57cec5SDimitry Andric /// \returns An integer containing the comparison results. If either of the two 12040b57cec5SDimitry Andric /// lower double-precision values is NaN, 0 is returned. 1205*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, 1206*81ad6265SDimitry Andric __m128d __b) { 12070b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 12080b57cec5SDimitry Andric } 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12110b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12120b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 12130b57cec5SDimitry Andric /// the second parameter. 12140b57cec5SDimitry Andric /// 12150b57cec5SDimitry Andric /// The comparison yields 0 for false, 1 for true. If either of the two lower 12160b57cec5SDimitry Andric /// double-precision values is NaN, 1 is returned. 12170b57cec5SDimitry Andric /// 12180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12190b57cec5SDimitry Andric /// 12200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12210b57cec5SDimitry Andric /// 12220b57cec5SDimitry Andric /// \param __a 12230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12240b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12250b57cec5SDimitry Andric /// \param __b 12260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12270b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12280b57cec5SDimitry Andric /// \returns An integer containing the comparison result. If either of the two 12290b57cec5SDimitry Andric /// lower double-precision values is NaN, 1 is returned. 1230*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, 1231*81ad6265SDimitry Andric __m128d __b) { 12320b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12360b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 12370b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 12380b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 12390b57cec5SDimitry Andric /// 12400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12410b57cec5SDimitry Andric /// 12420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 12430b57cec5SDimitry Andric /// 12440b57cec5SDimitry Andric /// \param __a 12450b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 12460b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 12470b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1248*81ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { 12490b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 12530b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 12540b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 12550b57cec5SDimitry Andric /// elements of the input vector are unused. 12560b57cec5SDimitry Andric /// 12570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12580b57cec5SDimitry Andric /// 12590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 12600b57cec5SDimitry Andric /// 12610b57cec5SDimitry Andric /// \param __a 12620b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 12630b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 12640b57cec5SDimitry Andric /// upper two elements are unused. 12650b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1266*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { 12670b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 12680b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 12690b57cec5SDimitry Andric } 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 12720b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 12730b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 12740b57cec5SDimitry Andric /// 12750b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 12760b57cec5SDimitry Andric /// 12770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12780b57cec5SDimitry Andric /// 12790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 12800b57cec5SDimitry Andric /// 12810b57cec5SDimitry Andric /// \param __a 12820b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 12830b57cec5SDimitry Andric /// converted to double-precision values. 12840b57cec5SDimitry Andric /// 12850b57cec5SDimitry Andric /// The upper two elements are unused. 12860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1287*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { 12880b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 12890b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 12900b57cec5SDimitry Andric } 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12930b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 12940b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 12950b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 12960b57cec5SDimitry Andric /// 12970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12980b57cec5SDimitry Andric /// 12990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 13000b57cec5SDimitry Andric /// 13010b57cec5SDimitry Andric /// \param __a 13020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 13030b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 13040b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1305*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { 13060b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 13100b57cec5SDimitry Andric /// into a 32-bit signed integer value. 13110b57cec5SDimitry Andric /// 13120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13130b57cec5SDimitry Andric /// 13140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 13150b57cec5SDimitry Andric /// 13160b57cec5SDimitry Andric /// \param __a 13170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 13180b57cec5SDimitry Andric /// conversion. 13190b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 1320*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { 13210b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 13220b57cec5SDimitry Andric } 13230b57cec5SDimitry Andric 13240b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 13250b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 13260b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 13270b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 13280b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 13290b57cec5SDimitry Andric /// 13300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13310b57cec5SDimitry Andric /// 13320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 13330b57cec5SDimitry Andric /// 13340b57cec5SDimitry Andric /// \param __a 13350b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 13360b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 13370b57cec5SDimitry Andric /// \param __b 13380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 13390b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13400b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 13410b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 13420b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 1343*81ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, 1344*81ad6265SDimitry Andric __m128d __b) { 13450b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 13460b57cec5SDimitry Andric } 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 13490b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 13500b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 13510b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 13520b57cec5SDimitry Andric /// 13530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13540b57cec5SDimitry Andric /// 13550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 13560b57cec5SDimitry Andric /// 13570b57cec5SDimitry Andric /// \param __a 13580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 13590b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 13600b57cec5SDimitry Andric /// \param __b 13610b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 13620b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 13630b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 13640b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 1365*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, 1366*81ad6265SDimitry Andric int __b) { 13670b57cec5SDimitry Andric __a[0] = __b; 13680b57cec5SDimitry Andric return __a; 13690b57cec5SDimitry Andric } 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 13720b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 13730b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 13740b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 13750b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 13760b57cec5SDimitry Andric /// 13770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13780b57cec5SDimitry Andric /// 13790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 13800b57cec5SDimitry Andric /// 13810b57cec5SDimitry Andric /// \param __a 13820b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 13830b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 13840b57cec5SDimitry Andric /// \param __b 13850b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 13860b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13870b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 13880b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 13890b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 1390*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, 1391*81ad6265SDimitry Andric __m128 __b) { 13920b57cec5SDimitry Andric __a[0] = __b[0]; 13930b57cec5SDimitry Andric return __a; 13940b57cec5SDimitry Andric } 13950b57cec5SDimitry Andric 13960b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 13970b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 13980b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. 13990b57cec5SDimitry Andric /// 14000b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14010b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. The upper 14020b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 14030b57cec5SDimitry Andric /// 14040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14050b57cec5SDimitry Andric /// 14060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 14070b57cec5SDimitry Andric /// instruction. 14080b57cec5SDimitry Andric /// 14090b57cec5SDimitry Andric /// \param __a 14100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14110b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 14120b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 1413*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { 14140b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 14150b57cec5SDimitry Andric } 14160b57cec5SDimitry Andric 14170b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 14180b57cec5SDimitry Andric /// signed integer value, truncating the result when it is inexact. 14190b57cec5SDimitry Andric /// 14200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14210b57cec5SDimitry Andric /// 14220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 14230b57cec5SDimitry Andric /// instruction. 14240b57cec5SDimitry Andric /// 14250b57cec5SDimitry Andric /// \param __a 14260b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 14270b57cec5SDimitry Andric /// conversion. 14280b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 1429*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { 14300b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14340b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14350b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14360b57cec5SDimitry Andric /// 14370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14380b57cec5SDimitry Andric /// 14390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 14400b57cec5SDimitry Andric /// 14410b57cec5SDimitry Andric /// \param __a 14420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14430b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 1444*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { 14450b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 14460b57cec5SDimitry Andric } 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14490b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14500b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14510b57cec5SDimitry Andric /// 14520b57cec5SDimitry Andric /// If the result of either conversion is inexact, the result is truncated 14530b57cec5SDimitry Andric /// (rounded towards zero) regardless of the current MXCSR setting. 14540b57cec5SDimitry Andric /// 14550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14560b57cec5SDimitry Andric /// 14570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 14580b57cec5SDimitry Andric /// 14590b57cec5SDimitry Andric /// \param __a 14600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14610b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 1462*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { 14630b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 14640b57cec5SDimitry Andric } 14650b57cec5SDimitry Andric 14660b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 14670b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 14680b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 14690b57cec5SDimitry Andric /// 14700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14710b57cec5SDimitry Andric /// 14720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 14730b57cec5SDimitry Andric /// 14740b57cec5SDimitry Andric /// \param __a 14750b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 14760b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 1477*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { 14780b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 14820b57cec5SDimitry Andric /// a double-precision floating-point value. 14830b57cec5SDimitry Andric /// 14840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14850b57cec5SDimitry Andric /// 14860b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 14870b57cec5SDimitry Andric /// 14880b57cec5SDimitry Andric /// \param __a 14890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 14900b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 14910b57cec5SDimitry Andric /// bits of \a __a. 1492*81ad6265SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { 14930b57cec5SDimitry Andric return __a[0]; 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 14970b57cec5SDimitry Andric /// memory location. 14980b57cec5SDimitry Andric /// 14990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15000b57cec5SDimitry Andric /// 15010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 15020b57cec5SDimitry Andric /// 15030b57cec5SDimitry Andric /// \param __dp 15040b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15050b57cec5SDimitry Andric /// location has to be 16-byte aligned. 15060b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 1507*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { 1508480093f4SDimitry Andric return *(const __m128d *)__dp; 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 15120b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 15130b57cec5SDimitry Andric /// [2 x double]. 15140b57cec5SDimitry Andric /// 15150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15160b57cec5SDimitry Andric /// 15170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 15180b57cec5SDimitry Andric /// 15190b57cec5SDimitry Andric /// \param __dp 15200b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 15210b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 15220b57cec5SDimitry Andric /// duplicated values. 1523*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { 15240b57cec5SDimitry Andric struct __mm_load1_pd_struct { 15250b57cec5SDimitry Andric double __u; 15260b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1527480093f4SDimitry Andric double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; 15280b57cec5SDimitry Andric return __extension__(__m128d){__u, __u}; 15290b57cec5SDimitry Andric } 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 15340b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 15350b57cec5SDimitry Andric /// 15360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15370b57cec5SDimitry Andric /// 15380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 15390b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 15400b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 15410b57cec5SDimitry Andric /// 15420b57cec5SDimitry Andric /// \param __dp 15430b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 15440b57cec5SDimitry Andric /// loaded in reverse order. 15450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 15460b57cec5SDimitry Andric /// values. 1547*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { 1548480093f4SDimitry Andric __m128d __u = *(const __m128d *)__dp; 15490b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 15500b57cec5SDimitry Andric } 15510b57cec5SDimitry Andric 15520b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 15530b57cec5SDimitry Andric /// unaligned memory location. 15540b57cec5SDimitry Andric /// 15550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15560b57cec5SDimitry Andric /// 15570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 15580b57cec5SDimitry Andric /// 15590b57cec5SDimitry Andric /// \param __dp 15600b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15610b57cec5SDimitry Andric /// location does not have to be aligned. 15620b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 1563*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { 15640b57cec5SDimitry Andric struct __loadu_pd { 15650b57cec5SDimitry Andric __m128d_u __v; 15660b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1567480093f4SDimitry Andric return ((const struct __loadu_pd *)__dp)->__v; 15680b57cec5SDimitry Andric } 15690b57cec5SDimitry Andric 15700b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 15710b57cec5SDimitry Andric /// vector and clears the upper element. 15720b57cec5SDimitry Andric /// 15730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15740b57cec5SDimitry Andric /// 15750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 15760b57cec5SDimitry Andric /// 15770b57cec5SDimitry Andric /// \param __a 15780b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 15790b57cec5SDimitry Andric /// location does not have to be aligned. 15800b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 1581*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { 15820b57cec5SDimitry Andric struct __loadu_si64 { 15830b57cec5SDimitry Andric long long __v; 15840b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1585480093f4SDimitry Andric long long __u = ((const struct __loadu_si64 *)__a)->__v; 15860b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__u, 0LL}; 15870b57cec5SDimitry Andric } 15880b57cec5SDimitry Andric 15890b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 15900b57cec5SDimitry Andric /// vector and clears the upper element. 15910b57cec5SDimitry Andric /// 15920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15930b57cec5SDimitry Andric /// 15940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 15950b57cec5SDimitry Andric /// 15960b57cec5SDimitry Andric /// \param __a 15970b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 15980b57cec5SDimitry Andric /// location does not have to be aligned. 15990b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 1600*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { 16010b57cec5SDimitry Andric struct __loadu_si32 { 16020b57cec5SDimitry Andric int __v; 16030b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1604480093f4SDimitry Andric int __u = ((const struct __loadu_si32 *)__a)->__v; 16050b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; 16060b57cec5SDimitry Andric } 16070b57cec5SDimitry Andric 16080b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 16090b57cec5SDimitry Andric /// vector and clears the upper element. 16100b57cec5SDimitry Andric /// 16110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16120b57cec5SDimitry Andric /// 16130b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 16140b57cec5SDimitry Andric /// 16150b57cec5SDimitry Andric /// \param __a 16160b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 16170b57cec5SDimitry Andric /// location does not have to be aligned. 16180b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 1619*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { 16200b57cec5SDimitry Andric struct __loadu_si16 { 16210b57cec5SDimitry Andric short __v; 16220b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1623480093f4SDimitry Andric short __u = ((const struct __loadu_si16 *)__a)->__v; 16240b57cec5SDimitry Andric return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 16250b57cec5SDimitry Andric } 16260b57cec5SDimitry Andric 16270b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 16280b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 16290b57cec5SDimitry Andric /// 16300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16310b57cec5SDimitry Andric /// 16320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 16330b57cec5SDimitry Andric /// 16340b57cec5SDimitry Andric /// \param __dp 16350b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 16360b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 16370b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 1638*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { 16390b57cec5SDimitry Andric struct __mm_load_sd_struct { 16400b57cec5SDimitry Andric double __u; 16410b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1642480093f4SDimitry Andric double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; 16430b57cec5SDimitry Andric return __extension__(__m128d){__u, 0}; 16440b57cec5SDimitry Andric } 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 16470b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 16480b57cec5SDimitry Andric /// bits of the first operand. 16490b57cec5SDimitry Andric /// 16500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16510b57cec5SDimitry Andric /// 16520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 16530b57cec5SDimitry Andric /// 16540b57cec5SDimitry Andric /// \param __a 16550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 16560b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 16570b57cec5SDimitry Andric /// \param __dp 16580b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 16590b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 16600b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 16610b57cec5SDimitry Andric /// to be aligned. 16620b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1663*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, 1664*81ad6265SDimitry Andric double const *__dp) { 16650b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 16660b57cec5SDimitry Andric double __u; 16670b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1668480093f4SDimitry Andric double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; 16690b57cec5SDimitry Andric return __extension__(__m128d){__a[0], __u}; 16700b57cec5SDimitry Andric } 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 16730b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 16740b57cec5SDimitry Andric /// high-order bits of the first operand. 16750b57cec5SDimitry Andric /// 16760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16770b57cec5SDimitry Andric /// 16780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 16790b57cec5SDimitry Andric /// 16800b57cec5SDimitry Andric /// \param __a 16810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 16820b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 16830b57cec5SDimitry Andric /// \param __dp 16840b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 16850b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 16860b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 16870b57cec5SDimitry Andric /// be aligned. 16880b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1689*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, 1690*81ad6265SDimitry Andric double const *__dp) { 16910b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 16920b57cec5SDimitry Andric double __u; 16930b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1694480093f4SDimitry Andric double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; 16950b57cec5SDimitry Andric return __extension__(__m128d){__u, __a[1]}; 16960b57cec5SDimitry Andric } 16970b57cec5SDimitry Andric 16980b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 16990b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 17000b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 17010b57cec5SDimitry Andric /// actually used. 17020b57cec5SDimitry Andric /// 17030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17040b57cec5SDimitry Andric /// 17050b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 17060b57cec5SDimitry Andric /// 17070b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 17080b57cec5SDimitry Andric /// content. 1709*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { 17100b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 17110b57cec5SDimitry Andric } 17120b57cec5SDimitry Andric 17130b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 17140b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 17150b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 17160b57cec5SDimitry Andric /// 17170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17180b57cec5SDimitry Andric /// 17190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 17200b57cec5SDimitry Andric /// 17210b57cec5SDimitry Andric /// \param __w 17220b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17230b57cec5SDimitry Andric /// bits of the result. 17240b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 17250b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 17260b57cec5SDimitry Andric /// set to zero. 1727*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { 17280b57cec5SDimitry Andric return __extension__(__m128d){__w, 0}; 17290b57cec5SDimitry Andric } 17300b57cec5SDimitry Andric 17310b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17320b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17330b57cec5SDimitry Andric /// specified double-precision floating-point value. 17340b57cec5SDimitry Andric /// 17350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17360b57cec5SDimitry Andric /// 17370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 17380b57cec5SDimitry Andric /// 17390b57cec5SDimitry Andric /// \param __w 17400b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 17410b57cec5SDimitry Andric /// element of the result. 17420b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1743*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { 17440b57cec5SDimitry Andric return __extension__(__m128d){__w, __w}; 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17480b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17490b57cec5SDimitry Andric /// specified double-precision floating-point value. 17500b57cec5SDimitry Andric /// 17510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17520b57cec5SDimitry Andric /// 17530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 17540b57cec5SDimitry Andric /// 17550b57cec5SDimitry Andric /// \param __w 17560b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 17570b57cec5SDimitry Andric /// element of the result. 17580b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1759*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { 17600b57cec5SDimitry Andric return _mm_set1_pd(__w); 17610b57cec5SDimitry Andric } 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 17640b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 17650b57cec5SDimitry Andric /// 17660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17670b57cec5SDimitry Andric /// 17680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 17690b57cec5SDimitry Andric /// 17700b57cec5SDimitry Andric /// \param __w 17710b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 17720b57cec5SDimitry Andric /// bits of the result. 17730b57cec5SDimitry Andric /// \param __x 17740b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17750b57cec5SDimitry Andric /// bits of the result. 17760b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1777*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, 1778*81ad6265SDimitry Andric double __x) { 17790b57cec5SDimitry Andric return __extension__(__m128d){__x, __w}; 17800b57cec5SDimitry Andric } 17810b57cec5SDimitry Andric 17820b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 17830b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 17840b57cec5SDimitry Andric /// floating-point values. 17850b57cec5SDimitry Andric /// 17860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17870b57cec5SDimitry Andric /// 17880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 17890b57cec5SDimitry Andric /// 17900b57cec5SDimitry Andric /// \param __w 17910b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17920b57cec5SDimitry Andric /// bits of the result. 17930b57cec5SDimitry Andric /// \param __x 17940b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 17950b57cec5SDimitry Andric /// bits of the result. 17960b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 1797*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, 1798*81ad6265SDimitry Andric double __x) { 17990b57cec5SDimitry Andric return __extension__(__m128d){__w, __x}; 18000b57cec5SDimitry Andric } 18010b57cec5SDimitry Andric 18020b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18030b57cec5SDimitry Andric /// initialized to zero. 18040b57cec5SDimitry Andric /// 18050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18060b57cec5SDimitry Andric /// 18070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 18080b57cec5SDimitry Andric /// 18090b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 18100b57cec5SDimitry Andric /// all elements set to zero. 1811*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { 18120b57cec5SDimitry Andric return __extension__(__m128d){0, 0}; 18130b57cec5SDimitry Andric } 18140b57cec5SDimitry Andric 18150b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 18160b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 18170b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 18180b57cec5SDimitry Andric /// 18190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18200b57cec5SDimitry Andric /// 18210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 18220b57cec5SDimitry Andric /// 18230b57cec5SDimitry Andric /// \param __a 18240b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 18250b57cec5SDimitry Andric /// upper 64 bits of the result. 18260b57cec5SDimitry Andric /// \param __b 18270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 18280b57cec5SDimitry Andric /// lower 64 bits of the result. 18290b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 1830*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, 1831*81ad6265SDimitry Andric __m128d __b) { 18320b57cec5SDimitry Andric __a[0] = __b[0]; 18330b57cec5SDimitry Andric return __a; 18340b57cec5SDimitry Andric } 18350b57cec5SDimitry Andric 18360b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 18370b57cec5SDimitry Andric /// memory location. 18380b57cec5SDimitry Andric /// 18390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18400b57cec5SDimitry Andric /// 18410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 18420b57cec5SDimitry Andric /// 18430b57cec5SDimitry Andric /// \param __dp 18440b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 18450b57cec5SDimitry Andric /// \param __a 18460b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 1847*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, 1848*81ad6265SDimitry Andric __m128d __a) { 18490b57cec5SDimitry Andric struct __mm_store_sd_struct { 18500b57cec5SDimitry Andric double __u; 18510b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 18520b57cec5SDimitry Andric ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric 18550b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 18560b57cec5SDimitry Andric /// [2 x double] to a memory location. 18570b57cec5SDimitry Andric /// 18580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18590b57cec5SDimitry Andric /// 18600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 18610b57cec5SDimitry Andric /// 18620b57cec5SDimitry Andric /// \param __dp 18630b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 18640b57cec5SDimitry Andric /// double-precision values. 18650b57cec5SDimitry Andric /// \param __a 18660b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 18670b57cec5SDimitry Andric /// moved. 1868*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, 1869*81ad6265SDimitry Andric __m128d __a) { 18700b57cec5SDimitry Andric *(__m128d *)__dp = __a; 18710b57cec5SDimitry Andric } 18720b57cec5SDimitry Andric 18730b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 18740b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 18750b57cec5SDimitry Andric /// 18760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18770b57cec5SDimitry Andric /// 18780b57cec5SDimitry Andric /// This intrinsic corresponds to the 18790b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 18800b57cec5SDimitry Andric /// 18810b57cec5SDimitry Andric /// \param __dp 18820b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 18830b57cec5SDimitry Andric /// values. 18840b57cec5SDimitry Andric /// \param __a 18850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 18860b57cec5SDimitry Andric /// of the values in \a __dp. 1887*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, 1888*81ad6265SDimitry Andric __m128d __a) { 18890b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 18900b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 18910b57cec5SDimitry Andric } 18920b57cec5SDimitry Andric 18930b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 18940b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 18950b57cec5SDimitry Andric /// 18960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18970b57cec5SDimitry Andric /// 18980b57cec5SDimitry Andric /// This intrinsic corresponds to the 18990b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19000b57cec5SDimitry Andric /// 19010b57cec5SDimitry Andric /// \param __dp 19020b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19030b57cec5SDimitry Andric /// values. 19040b57cec5SDimitry Andric /// \param __a 19050b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19060b57cec5SDimitry Andric /// of the values in \a __dp. 1907*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, 1908*81ad6265SDimitry Andric __m128d __a) { 19090b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 19100b57cec5SDimitry Andric } 19110b57cec5SDimitry Andric 19120b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 19130b57cec5SDimitry Andric /// location. 19140b57cec5SDimitry Andric /// 19150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19160b57cec5SDimitry Andric /// 19170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 19180b57cec5SDimitry Andric /// 19190b57cec5SDimitry Andric /// \param __dp 19200b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 19210b57cec5SDimitry Andric /// location does not have to be aligned. 19220b57cec5SDimitry Andric /// \param __a 19230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 1924*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, 1925*81ad6265SDimitry Andric __m128d __a) { 19260b57cec5SDimitry Andric struct __storeu_pd { 19270b57cec5SDimitry Andric __m128d_u __v; 19280b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19290b57cec5SDimitry Andric ((struct __storeu_pd *)__dp)->__v = __a; 19300b57cec5SDimitry Andric } 19310b57cec5SDimitry Andric 19320b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 19330b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 19340b57cec5SDimitry Andric /// 19350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19360b57cec5SDimitry Andric /// 19370b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 19380b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 19390b57cec5SDimitry Andric /// 19400b57cec5SDimitry Andric /// \param __dp 19410b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 19420b57cec5SDimitry Andric /// double-precision values. 19430b57cec5SDimitry Andric /// \param __a 19440b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 19450b57cec5SDimitry Andric /// stored. 1946*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, 1947*81ad6265SDimitry Andric __m128d __a) { 19480b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 19490b57cec5SDimitry Andric *(__m128d *)__dp = __a; 19500b57cec5SDimitry Andric } 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 19530b57cec5SDimitry Andric /// memory location. 19540b57cec5SDimitry Andric /// 19550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19560b57cec5SDimitry Andric /// 19570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 19580b57cec5SDimitry Andric /// 19590b57cec5SDimitry Andric /// \param __dp 19600b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19610b57cec5SDimitry Andric /// \param __a 19620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 1963*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, 1964*81ad6265SDimitry Andric __m128d __a) { 19650b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 19660b57cec5SDimitry Andric double __u; 19670b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19680b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; 19690b57cec5SDimitry Andric } 19700b57cec5SDimitry Andric 19710b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 19720b57cec5SDimitry Andric /// memory location. 19730b57cec5SDimitry Andric /// 19740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19750b57cec5SDimitry Andric /// 19760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 19770b57cec5SDimitry Andric /// 19780b57cec5SDimitry Andric /// \param __dp 19790b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 19800b57cec5SDimitry Andric /// \param __a 19810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 1982*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, 1983*81ad6265SDimitry Andric __m128d __a) { 19840b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 19850b57cec5SDimitry Andric double __u; 19860b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19870b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; 19880b57cec5SDimitry Andric } 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 19910b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 19920b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 19930b57cec5SDimitry Andric /// 19940b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 19950b57cec5SDimitry Andric /// 19960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19970b57cec5SDimitry Andric /// 19980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 19990b57cec5SDimitry Andric /// 20000b57cec5SDimitry Andric /// \param __a 20010b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20020b57cec5SDimitry Andric /// \param __b 20030b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20040b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 20050b57cec5SDimitry Andric /// parameters. 2006*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, 2007*81ad6265SDimitry Andric __m128i __b) { 20080b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 20090b57cec5SDimitry Andric } 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 20120b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 20130b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 20140b57cec5SDimitry Andric /// 20150b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20160b57cec5SDimitry Andric /// 20170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20180b57cec5SDimitry Andric /// 20190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 20200b57cec5SDimitry Andric /// 20210b57cec5SDimitry Andric /// \param __a 20220b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20230b57cec5SDimitry Andric /// \param __b 20240b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20250b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 20260b57cec5SDimitry Andric /// parameters. 2027*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, 2028*81ad6265SDimitry Andric __m128i __b) { 20290b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 20300b57cec5SDimitry Andric } 20310b57cec5SDimitry Andric 20320b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 20330b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 20340b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 20350b57cec5SDimitry Andric /// 20360b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20370b57cec5SDimitry Andric /// 20380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20390b57cec5SDimitry Andric /// 20400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 20410b57cec5SDimitry Andric /// 20420b57cec5SDimitry Andric /// \param __a 20430b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20440b57cec5SDimitry Andric /// \param __b 20450b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20460b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 20470b57cec5SDimitry Andric /// parameters. 2048*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, 2049*81ad6265SDimitry Andric __m128i __b) { 20500b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 20510b57cec5SDimitry Andric } 20520b57cec5SDimitry Andric 20530b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 20540b57cec5SDimitry Andric /// lower 64 bits of the sum. 20550b57cec5SDimitry Andric /// 20560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20570b57cec5SDimitry Andric /// 20580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 20590b57cec5SDimitry Andric /// 20600b57cec5SDimitry Andric /// \param __a 20610b57cec5SDimitry Andric /// A 64-bit integer. 20620b57cec5SDimitry Andric /// \param __b 20630b57cec5SDimitry Andric /// A 64-bit integer. 20640b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 2065*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, 2066*81ad6265SDimitry Andric __m64 __b) { 20670b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 20680b57cec5SDimitry Andric } 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 20710b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 20720b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 20730b57cec5SDimitry Andric /// 20740b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20750b57cec5SDimitry Andric /// 20760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20770b57cec5SDimitry Andric /// 20780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 20790b57cec5SDimitry Andric /// 20800b57cec5SDimitry Andric /// \param __a 20810b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 20820b57cec5SDimitry Andric /// \param __b 20830b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 20840b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 20850b57cec5SDimitry Andric /// parameters. 2086*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, 2087*81ad6265SDimitry Andric __m128i __b) { 20880b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 20890b57cec5SDimitry Andric } 20900b57cec5SDimitry Andric 20910b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 20920b57cec5SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element of 20930b57cec5SDimitry Andric /// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are 20940b57cec5SDimitry Andric /// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80. 20950b57cec5SDimitry Andric /// 20960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20970b57cec5SDimitry Andric /// 20980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 20990b57cec5SDimitry Andric /// 21000b57cec5SDimitry Andric /// \param __a 21010b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21020b57cec5SDimitry Andric /// \param __b 21030b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21040b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 21050b57cec5SDimitry Andric /// both parameters. 2106*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, 2107*81ad6265SDimitry Andric __m128i __b) { 2108*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); 21090b57cec5SDimitry Andric } 21100b57cec5SDimitry Andric 21110b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21120b57cec5SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element of 21130b57cec5SDimitry Andric /// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF 21140b57cec5SDimitry Andric /// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 21150b57cec5SDimitry Andric /// 0x8000. 21160b57cec5SDimitry Andric /// 21170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21180b57cec5SDimitry Andric /// 21190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 21200b57cec5SDimitry Andric /// 21210b57cec5SDimitry Andric /// \param __a 21220b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21230b57cec5SDimitry Andric /// \param __b 21240b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21250b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 21260b57cec5SDimitry Andric /// both parameters. 2127*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, 2128*81ad6265SDimitry Andric __m128i __b) { 2129*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); 21300b57cec5SDimitry Andric } 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21330b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 21340b57cec5SDimitry Andric /// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF 21350b57cec5SDimitry Andric /// are saturated to 0xFF. Negative sums are saturated to 0x00. 21360b57cec5SDimitry Andric /// 21370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21380b57cec5SDimitry Andric /// 21390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 21400b57cec5SDimitry Andric /// 21410b57cec5SDimitry Andric /// \param __a 21420b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21430b57cec5SDimitry Andric /// \param __b 21440b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21450b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 21460b57cec5SDimitry Andric /// of both parameters. 2147*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, 2148*81ad6265SDimitry Andric __m128i __b) { 2149*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); 21500b57cec5SDimitry Andric } 21510b57cec5SDimitry Andric 21520b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21530b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 21540b57cec5SDimitry Andric /// of a 128-bit result vector of [8 x i16]. Positive sums greater than 21550b57cec5SDimitry Andric /// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000. 21560b57cec5SDimitry Andric /// 21570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21580b57cec5SDimitry Andric /// 21590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 21600b57cec5SDimitry Andric /// 21610b57cec5SDimitry Andric /// \param __a 21620b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 21630b57cec5SDimitry Andric /// \param __b 21640b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 21650b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 21660b57cec5SDimitry Andric /// of both parameters. 2167*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, 2168*81ad6265SDimitry Andric __m128i __b) { 2169*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); 21700b57cec5SDimitry Andric } 21710b57cec5SDimitry Andric 2172480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 21730b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 21740b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 21750b57cec5SDimitry Andric /// 21760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21770b57cec5SDimitry Andric /// 21780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 21790b57cec5SDimitry Andric /// 21800b57cec5SDimitry Andric /// \param __a 21810b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21820b57cec5SDimitry Andric /// \param __b 21830b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21840b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 21850b57cec5SDimitry Andric /// averages of both parameters. 2186*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, 2187*81ad6265SDimitry Andric __m128i __b) { 21880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 21890b57cec5SDimitry Andric } 21900b57cec5SDimitry Andric 2191480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 21920b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 21930b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 21940b57cec5SDimitry Andric /// 21950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21960b57cec5SDimitry Andric /// 21970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 21980b57cec5SDimitry Andric /// 21990b57cec5SDimitry Andric /// \param __a 22000b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22010b57cec5SDimitry Andric /// \param __b 22020b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22030b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 22040b57cec5SDimitry Andric /// averages of both parameters. 2205*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, 2206*81ad6265SDimitry Andric __m128i __b) { 22070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 22080b57cec5SDimitry Andric } 22090b57cec5SDimitry Andric 22100b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 22110b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 22120b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 22130b57cec5SDimitry Andric /// [4 x i32] vector. 22140b57cec5SDimitry Andric /// 22150b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 22160b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 22170b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 22180b57cec5SDimitry Andric /// of the result. 22190b57cec5SDimitry Andric /// 22200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22210b57cec5SDimitry Andric /// 22220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 22230b57cec5SDimitry Andric /// 22240b57cec5SDimitry Andric /// \param __a 22250b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22260b57cec5SDimitry Andric /// \param __b 22270b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22280b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 22290b57cec5SDimitry Andric /// of both parameters. 2230*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, 2231*81ad6265SDimitry Andric __m128i __b) { 22320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 22330b57cec5SDimitry Andric } 22340b57cec5SDimitry Andric 22350b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 22360b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 22370b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22380b57cec5SDimitry Andric /// 22390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22400b57cec5SDimitry Andric /// 22410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 22420b57cec5SDimitry Andric /// 22430b57cec5SDimitry Andric /// \param __a 22440b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22450b57cec5SDimitry Andric /// \param __b 22460b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22470b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 22480b57cec5SDimitry Andric /// each comparison. 2249*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, 2250*81ad6265SDimitry Andric __m128i __b) { 225104eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); 22520b57cec5SDimitry Andric } 22530b57cec5SDimitry Andric 22540b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 22550b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 22560b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22570b57cec5SDimitry Andric /// 22580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22590b57cec5SDimitry Andric /// 22600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 22610b57cec5SDimitry Andric /// 22620b57cec5SDimitry Andric /// \param __a 22630b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22640b57cec5SDimitry Andric /// \param __b 22650b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22660b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 22670b57cec5SDimitry Andric /// each comparison. 2268*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, 2269*81ad6265SDimitry Andric __m128i __b) { 227004eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); 22710b57cec5SDimitry Andric } 22720b57cec5SDimitry Andric 22730b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 22740b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 22750b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22760b57cec5SDimitry Andric /// 22770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22780b57cec5SDimitry Andric /// 22790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 22800b57cec5SDimitry Andric /// 22810b57cec5SDimitry Andric /// \param __a 22820b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22830b57cec5SDimitry Andric /// \param __b 22840b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22850b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 22860b57cec5SDimitry Andric /// each comparison. 2287*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, 2288*81ad6265SDimitry Andric __m128i __b) { 228904eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); 22900b57cec5SDimitry Andric } 22910b57cec5SDimitry Andric 22920b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 22930b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 22940b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22950b57cec5SDimitry Andric /// 22960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22970b57cec5SDimitry Andric /// 22980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 22990b57cec5SDimitry Andric /// 23000b57cec5SDimitry Andric /// \param __a 23010b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23020b57cec5SDimitry Andric /// \param __b 23030b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23040b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 23050b57cec5SDimitry Andric /// each comparison. 2306*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, 2307*81ad6265SDimitry Andric __m128i __b) { 230804eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric 23110b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 23120b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23130b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 23140b57cec5SDimitry Andric /// 23150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23160b57cec5SDimitry Andric /// 23170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 23180b57cec5SDimitry Andric /// 23190b57cec5SDimitry Andric /// \param __a 23200b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23210b57cec5SDimitry Andric /// \param __b 23220b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23230b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 23240b57cec5SDimitry Andric /// each of the eight 32-bit products. 2325*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, 2326*81ad6265SDimitry Andric __m128i __b) { 23270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 23280b57cec5SDimitry Andric } 23290b57cec5SDimitry Andric 23300b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 23310b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23320b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 23330b57cec5SDimitry Andric /// 23340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23350b57cec5SDimitry Andric /// 23360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 23370b57cec5SDimitry Andric /// 23380b57cec5SDimitry Andric /// \param __a 23390b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23400b57cec5SDimitry Andric /// \param __b 23410b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23420b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 23430b57cec5SDimitry Andric /// of each of the eight 32-bit products. 2344*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, 2345*81ad6265SDimitry Andric __m128i __b) { 23460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 23470b57cec5SDimitry Andric } 23480b57cec5SDimitry Andric 23490b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 23500b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 23510b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 23520b57cec5SDimitry Andric /// 23530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23540b57cec5SDimitry Andric /// 23550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 23560b57cec5SDimitry Andric /// 23570b57cec5SDimitry Andric /// \param __a 23580b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23590b57cec5SDimitry Andric /// \param __b 23600b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23610b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 23620b57cec5SDimitry Andric /// each of the eight 32-bit products. 2363*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, 2364*81ad6265SDimitry Andric __m128i __b) { 23650b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 23660b57cec5SDimitry Andric } 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 23690b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 23700b57cec5SDimitry Andric /// product. 23710b57cec5SDimitry Andric /// 23720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23730b57cec5SDimitry Andric /// 23740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 23750b57cec5SDimitry Andric /// 23760b57cec5SDimitry Andric /// \param __a 23770b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 23780b57cec5SDimitry Andric /// \param __b 23790b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 23800b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 2381*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, 2382*81ad6265SDimitry Andric __m64 __b) { 23830b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 23840b57cec5SDimitry Andric } 23850b57cec5SDimitry Andric 23860b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 23870b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 23880b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 23890b57cec5SDimitry Andric /// 23900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23910b57cec5SDimitry Andric /// 23920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 23930b57cec5SDimitry Andric /// 23940b57cec5SDimitry Andric /// \param __a 23950b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 23960b57cec5SDimitry Andric /// \param __b 23970b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 23980b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 2399*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, 2400*81ad6265SDimitry Andric __m128i __b) { 24010b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 24020b57cec5SDimitry Andric } 24030b57cec5SDimitry Andric 24040b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 24050b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 24060b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 24070b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 24080b57cec5SDimitry Andric /// [2 x i64] vector. 24090b57cec5SDimitry Andric /// 24100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24110b57cec5SDimitry Andric /// 24120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 24130b57cec5SDimitry Andric /// 24140b57cec5SDimitry Andric /// \param __a 24150b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24160b57cec5SDimitry Andric /// \param __b 24170b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24180b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 24190b57cec5SDimitry Andric /// differences between both operands. 2420*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, 2421*81ad6265SDimitry Andric __m128i __b) { 24220b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 24230b57cec5SDimitry Andric } 24240b57cec5SDimitry Andric 24250b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 24260b57cec5SDimitry Andric /// 24270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24280b57cec5SDimitry Andric /// 24290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 24300b57cec5SDimitry Andric /// 24310b57cec5SDimitry Andric /// \param __a 24320b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24330b57cec5SDimitry Andric /// \param __b 24340b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24360b57cec5SDimitry Andric /// in the operands. 2437*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, 2438*81ad6265SDimitry Andric __m128i __b) { 24390b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 24400b57cec5SDimitry Andric } 24410b57cec5SDimitry Andric 24420b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 24430b57cec5SDimitry Andric /// 24440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24450b57cec5SDimitry Andric /// 24460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 24470b57cec5SDimitry Andric /// 24480b57cec5SDimitry Andric /// \param __a 24490b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24500b57cec5SDimitry Andric /// \param __b 24510b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24520b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24530b57cec5SDimitry Andric /// in the operands. 2454*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, 2455*81ad6265SDimitry Andric __m128i __b) { 24560b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 24570b57cec5SDimitry Andric } 24580b57cec5SDimitry Andric 24590b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 24600b57cec5SDimitry Andric /// 24610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24620b57cec5SDimitry Andric /// 24630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 24640b57cec5SDimitry Andric /// 24650b57cec5SDimitry Andric /// \param __a 24660b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24670b57cec5SDimitry Andric /// \param __b 24680b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24690b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24700b57cec5SDimitry Andric /// in the operands. 2471*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, 2472*81ad6265SDimitry Andric __m128i __b) { 24730b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 24740b57cec5SDimitry Andric } 24750b57cec5SDimitry Andric 24760b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 24770b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 24780b57cec5SDimitry Andric /// 24790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24800b57cec5SDimitry Andric /// 24810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 24820b57cec5SDimitry Andric /// 24830b57cec5SDimitry Andric /// \param __a 24840b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 24850b57cec5SDimitry Andric /// \param __b 24860b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 24870b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 24880b57cec5SDimitry Andric /// the operands. 2489*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, 2490*81ad6265SDimitry Andric __m64 __b) { 24910b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 24920b57cec5SDimitry Andric } 24930b57cec5SDimitry Andric 24940b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 24950b57cec5SDimitry Andric /// 24960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24970b57cec5SDimitry Andric /// 24980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 24990b57cec5SDimitry Andric /// 25000b57cec5SDimitry Andric /// \param __a 25010b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25020b57cec5SDimitry Andric /// \param __b 25030b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25050b57cec5SDimitry Andric /// in the operands. 2506*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, 2507*81ad6265SDimitry Andric __m128i __b) { 25080b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 25090b57cec5SDimitry Andric } 25100b57cec5SDimitry Andric 25110b57cec5SDimitry Andric /// Subtracts corresponding 8-bit signed integer values in the input and 25120b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 25130b57cec5SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences less 25140b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. 25150b57cec5SDimitry Andric /// 25160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25170b57cec5SDimitry Andric /// 25180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 25190b57cec5SDimitry Andric /// 25200b57cec5SDimitry Andric /// \param __a 25210b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25220b57cec5SDimitry Andric /// \param __b 25230b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25240b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25250b57cec5SDimitry Andric /// in the operands. 2526*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, 2527*81ad6265SDimitry Andric __m128i __b) { 2528*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); 25290b57cec5SDimitry Andric } 25300b57cec5SDimitry Andric 25310b57cec5SDimitry Andric /// Subtracts corresponding 16-bit signed integer values in the input and 25320b57cec5SDimitry Andric /// returns the differences in the corresponding bytes in the destination. 25330b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 25340b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 25350b57cec5SDimitry Andric /// 25360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25370b57cec5SDimitry Andric /// 25380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 25390b57cec5SDimitry Andric /// 25400b57cec5SDimitry Andric /// \param __a 25410b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25420b57cec5SDimitry Andric /// \param __b 25430b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25450b57cec5SDimitry Andric /// in the operands. 2546*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, 2547*81ad6265SDimitry Andric __m128i __b) { 2548*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); 25490b57cec5SDimitry Andric } 25500b57cec5SDimitry Andric 25510b57cec5SDimitry Andric /// Subtracts corresponding 8-bit unsigned integer values in the input 25520b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 25530b57cec5SDimitry Andric /// destination. Differences less than 0x00 are saturated to 0x00. 25540b57cec5SDimitry Andric /// 25550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25560b57cec5SDimitry Andric /// 25570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 25580b57cec5SDimitry Andric /// 25590b57cec5SDimitry Andric /// \param __a 25600b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25610b57cec5SDimitry Andric /// \param __b 25620b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25630b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 25640b57cec5SDimitry Andric /// differences of the values in the operands. 2565*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, 2566*81ad6265SDimitry Andric __m128i __b) { 2567*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); 25680b57cec5SDimitry Andric } 25690b57cec5SDimitry Andric 25700b57cec5SDimitry Andric /// Subtracts corresponding 16-bit unsigned integer values in the input 25710b57cec5SDimitry Andric /// and returns the differences in the corresponding bytes in the 25720b57cec5SDimitry Andric /// destination. Differences less than 0x0000 are saturated to 0x0000. 25730b57cec5SDimitry Andric /// 25740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25750b57cec5SDimitry Andric /// 25760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 25770b57cec5SDimitry Andric /// 25780b57cec5SDimitry Andric /// \param __a 25790b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25800b57cec5SDimitry Andric /// \param __b 25810b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25820b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 25830b57cec5SDimitry Andric /// differences of the values in the operands. 2584*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, 2585*81ad6265SDimitry Andric __m128i __b) { 2586*81ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); 25870b57cec5SDimitry Andric } 25880b57cec5SDimitry Andric 25890b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 25900b57cec5SDimitry Andric /// 25910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25920b57cec5SDimitry Andric /// 25930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 25940b57cec5SDimitry Andric /// 25950b57cec5SDimitry Andric /// \param __a 25960b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25970b57cec5SDimitry Andric /// \param __b 25980b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 25990b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 26000b57cec5SDimitry Andric /// in both operands. 2601*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, 2602*81ad6265SDimitry Andric __m128i __b) { 26030b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 26040b57cec5SDimitry Andric } 26050b57cec5SDimitry Andric 26060b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 26070b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 26080b57cec5SDimitry Andric /// 26090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26100b57cec5SDimitry Andric /// 26110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 26120b57cec5SDimitry Andric /// 26130b57cec5SDimitry Andric /// \param __a 26140b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 26150b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 26160b57cec5SDimitry Andric /// \param __b 26170b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 26180b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 26190b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 2620*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, 2621*81ad6265SDimitry Andric __m128i __b) { 26220b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 26230b57cec5SDimitry Andric } 26240b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 26250b57cec5SDimitry Andric /// 26260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26270b57cec5SDimitry Andric /// 26280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 26290b57cec5SDimitry Andric /// 26300b57cec5SDimitry Andric /// \param __a 26310b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26320b57cec5SDimitry Andric /// \param __b 26330b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26340b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 26350b57cec5SDimitry Andric /// in both operands. 2636*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, 2637*81ad6265SDimitry Andric __m128i __b) { 26380b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 26390b57cec5SDimitry Andric } 26400b57cec5SDimitry Andric 26410b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 26420b57cec5SDimitry Andric /// 26430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26440b57cec5SDimitry Andric /// 26450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 26460b57cec5SDimitry Andric /// 26470b57cec5SDimitry Andric /// \param __a 26480b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26490b57cec5SDimitry Andric /// \param __b 26500b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26510b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 26520b57cec5SDimitry Andric /// values in both operands. 2653*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, 2654*81ad6265SDimitry Andric __m128i __b) { 26550b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 26560b57cec5SDimitry Andric } 26570b57cec5SDimitry Andric 26580b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 26590b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 26600b57cec5SDimitry Andric /// 26610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26620b57cec5SDimitry Andric /// 26630b57cec5SDimitry Andric /// \code 26640b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 26650b57cec5SDimitry Andric /// \endcode 26660b57cec5SDimitry Andric /// 26670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 26680b57cec5SDimitry Andric /// 26690b57cec5SDimitry Andric /// \param a 26700b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 26710b57cec5SDimitry Andric /// \param imm 26720b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 26730b57cec5SDimitry Andric /// \a a. 26740b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 26750b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 2676*81ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 2677*81ad6265SDimitry Andric (int)(imm))) 26780b57cec5SDimitry Andric 26790b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 2680*81ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 2681*81ad6265SDimitry Andric (int)(imm))) 26820b57cec5SDimitry Andric 26830b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 26840b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 26850b57cec5SDimitry Andric /// 26860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26870b57cec5SDimitry Andric /// 26880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 26890b57cec5SDimitry Andric /// 26900b57cec5SDimitry Andric /// \param __a 26910b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 26920b57cec5SDimitry Andric /// \param __count 26930b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 26940b57cec5SDimitry Andric /// in operand \a __a. 26950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2696*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, 2697*81ad6265SDimitry Andric int __count) { 26980b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 26990b57cec5SDimitry Andric } 27000b57cec5SDimitry Andric 27010b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 27020b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27030b57cec5SDimitry Andric /// 27040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27050b57cec5SDimitry Andric /// 27060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 27070b57cec5SDimitry Andric /// 27080b57cec5SDimitry Andric /// \param __a 27090b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27100b57cec5SDimitry Andric /// \param __count 27110b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27120b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27130b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2714*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, 2715*81ad6265SDimitry Andric __m128i __count) { 27160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 27170b57cec5SDimitry Andric } 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27200b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27210b57cec5SDimitry Andric /// 27220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27230b57cec5SDimitry Andric /// 27240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 27250b57cec5SDimitry Andric /// 27260b57cec5SDimitry Andric /// \param __a 27270b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27280b57cec5SDimitry Andric /// \param __count 27290b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27300b57cec5SDimitry Andric /// in operand \a __a. 27310b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2732*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, 2733*81ad6265SDimitry Andric int __count) { 27340b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 27350b57cec5SDimitry Andric } 27360b57cec5SDimitry Andric 27370b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27380b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27390b57cec5SDimitry Andric /// 27400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27410b57cec5SDimitry Andric /// 27420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 27430b57cec5SDimitry Andric /// 27440b57cec5SDimitry Andric /// \param __a 27450b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27460b57cec5SDimitry Andric /// \param __count 27470b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27480b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27490b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2750*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, 2751*81ad6265SDimitry Andric __m128i __count) { 27520b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 27530b57cec5SDimitry Andric } 27540b57cec5SDimitry Andric 27550b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 27560b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27570b57cec5SDimitry Andric /// 27580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27590b57cec5SDimitry Andric /// 27600b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 27610b57cec5SDimitry Andric /// 27620b57cec5SDimitry Andric /// \param __a 27630b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27640b57cec5SDimitry Andric /// \param __count 27650b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27660b57cec5SDimitry Andric /// in operand \a __a. 27670b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2768*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, 2769*81ad6265SDimitry Andric int __count) { 27700b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 27710b57cec5SDimitry Andric } 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 27740b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27750b57cec5SDimitry Andric /// 27760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27770b57cec5SDimitry Andric /// 27780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 27790b57cec5SDimitry Andric /// 27800b57cec5SDimitry Andric /// \param __a 27810b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27820b57cec5SDimitry Andric /// \param __count 27830b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27840b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27850b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 2786*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, 2787*81ad6265SDimitry Andric __m128i __count) { 27880b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 27890b57cec5SDimitry Andric } 27900b57cec5SDimitry Andric 27910b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 27920b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 27930b57cec5SDimitry Andric /// bit of the initial value. 27940b57cec5SDimitry Andric /// 27950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27960b57cec5SDimitry Andric /// 27970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 27980b57cec5SDimitry Andric /// 27990b57cec5SDimitry Andric /// \param __a 28000b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28010b57cec5SDimitry Andric /// \param __count 28020b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 28030b57cec5SDimitry Andric /// in operand \a __a. 28040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2805*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, 2806*81ad6265SDimitry Andric int __count) { 28070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 28080b57cec5SDimitry Andric } 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 28110b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28120b57cec5SDimitry Andric /// bit of the initial value. 28130b57cec5SDimitry Andric /// 28140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28150b57cec5SDimitry Andric /// 28160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 28170b57cec5SDimitry Andric /// 28180b57cec5SDimitry Andric /// \param __a 28190b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28200b57cec5SDimitry Andric /// \param __count 28210b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28220b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 28230b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2824*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, 2825*81ad6265SDimitry Andric __m128i __count) { 28260b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 28270b57cec5SDimitry Andric } 28280b57cec5SDimitry Andric 28290b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 28300b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28310b57cec5SDimitry Andric /// bit of the initial value. 28320b57cec5SDimitry Andric /// 28330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28340b57cec5SDimitry Andric /// 28350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 28360b57cec5SDimitry Andric /// 28370b57cec5SDimitry Andric /// \param __a 28380b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28390b57cec5SDimitry Andric /// \param __count 28400b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 28410b57cec5SDimitry Andric /// in operand \a __a. 28420b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2843*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, 2844*81ad6265SDimitry Andric int __count) { 28450b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 28460b57cec5SDimitry Andric } 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 28490b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28500b57cec5SDimitry Andric /// bit of the initial value. 28510b57cec5SDimitry Andric /// 28520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28530b57cec5SDimitry Andric /// 28540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 28550b57cec5SDimitry Andric /// 28560b57cec5SDimitry Andric /// \param __a 28570b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28580b57cec5SDimitry Andric /// \param __count 28590b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28600b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 28610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2862*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, 2863*81ad6265SDimitry Andric __m128i __count) { 28640b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 28650b57cec5SDimitry Andric } 28660b57cec5SDimitry Andric 28670b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 28680b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 28690b57cec5SDimitry Andric /// 28700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28710b57cec5SDimitry Andric /// 28720b57cec5SDimitry Andric /// \code 28730b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 28740b57cec5SDimitry Andric /// \endcode 28750b57cec5SDimitry Andric /// 28760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 28770b57cec5SDimitry Andric /// 28780b57cec5SDimitry Andric /// \param a 28790b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28800b57cec5SDimitry Andric /// \param imm 28810b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 28820b57cec5SDimitry Andric /// \a a. 28830b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 28840b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 2885*81ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 2886*81ad6265SDimitry Andric (int)(imm))) 28870b57cec5SDimitry Andric 28880b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 2889*81ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 2890*81ad6265SDimitry Andric (int)(imm))) 28910b57cec5SDimitry Andric 28920b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 28930b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 28940b57cec5SDimitry Andric /// 28950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28960b57cec5SDimitry Andric /// 28970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 28980b57cec5SDimitry Andric /// 28990b57cec5SDimitry Andric /// \param __a 29000b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29010b57cec5SDimitry Andric /// \param __count 29020b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29030b57cec5SDimitry Andric /// in operand \a __a. 29040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2905*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, 2906*81ad6265SDimitry Andric int __count) { 29070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 29080b57cec5SDimitry Andric } 29090b57cec5SDimitry Andric 29100b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 29110b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29120b57cec5SDimitry Andric /// 29130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29140b57cec5SDimitry Andric /// 29150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 29160b57cec5SDimitry Andric /// 29170b57cec5SDimitry Andric /// \param __a 29180b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29190b57cec5SDimitry Andric /// \param __count 29200b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29210b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2923*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, 2924*81ad6265SDimitry Andric __m128i __count) { 29250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 29260b57cec5SDimitry Andric } 29270b57cec5SDimitry Andric 29280b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 29290b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29300b57cec5SDimitry Andric /// 29310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29320b57cec5SDimitry Andric /// 29330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 29340b57cec5SDimitry Andric /// 29350b57cec5SDimitry Andric /// \param __a 29360b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29370b57cec5SDimitry Andric /// \param __count 29380b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29390b57cec5SDimitry Andric /// in operand \a __a. 29400b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2941*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, 2942*81ad6265SDimitry Andric int __count) { 29430b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 29440b57cec5SDimitry Andric } 29450b57cec5SDimitry Andric 29460b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 29470b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29480b57cec5SDimitry Andric /// 29490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29500b57cec5SDimitry Andric /// 29510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 29520b57cec5SDimitry Andric /// 29530b57cec5SDimitry Andric /// \param __a 29540b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29550b57cec5SDimitry Andric /// \param __count 29560b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29570b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29580b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2959*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, 2960*81ad6265SDimitry Andric __m128i __count) { 29610b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 29620b57cec5SDimitry Andric } 29630b57cec5SDimitry Andric 29640b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 29650b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29660b57cec5SDimitry Andric /// 29670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29680b57cec5SDimitry Andric /// 29690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 29700b57cec5SDimitry Andric /// 29710b57cec5SDimitry Andric /// \param __a 29720b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29730b57cec5SDimitry Andric /// \param __count 29740b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29750b57cec5SDimitry Andric /// in operand \a __a. 29760b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2977*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, 2978*81ad6265SDimitry Andric int __count) { 29790b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 29800b57cec5SDimitry Andric } 29810b57cec5SDimitry Andric 29820b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 29830b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29840b57cec5SDimitry Andric /// 29850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29860b57cec5SDimitry Andric /// 29870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 29880b57cec5SDimitry Andric /// 29890b57cec5SDimitry Andric /// \param __a 29900b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29910b57cec5SDimitry Andric /// \param __count 29920b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29930b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29940b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 2995*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, 2996*81ad6265SDimitry Andric __m128i __count) { 29970b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 30010b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF 30020b57cec5SDimitry Andric /// for true. 30030b57cec5SDimitry Andric /// 30040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30050b57cec5SDimitry Andric /// 30060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 30070b57cec5SDimitry Andric /// 30080b57cec5SDimitry Andric /// \param __a 30090b57cec5SDimitry Andric /// A 128-bit integer vector. 30100b57cec5SDimitry Andric /// \param __b 30110b57cec5SDimitry Andric /// A 128-bit integer vector. 30120b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3013*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, 3014*81ad6265SDimitry Andric __m128i __b) { 30150b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 30160b57cec5SDimitry Andric } 30170b57cec5SDimitry Andric 30180b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 30190b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 30200b57cec5SDimitry Andric /// 0xFFFF for true. 30210b57cec5SDimitry Andric /// 30220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30230b57cec5SDimitry Andric /// 30240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 30250b57cec5SDimitry Andric /// 30260b57cec5SDimitry Andric /// \param __a 30270b57cec5SDimitry Andric /// A 128-bit integer vector. 30280b57cec5SDimitry Andric /// \param __b 30290b57cec5SDimitry Andric /// A 128-bit integer vector. 30300b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3031*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, 3032*81ad6265SDimitry Andric __m128i __b) { 30330b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 30340b57cec5SDimitry Andric } 30350b57cec5SDimitry Andric 30360b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 30370b57cec5SDimitry Andric /// integer vectors for equality. Each comparison yields 0x0 for false, 30380b57cec5SDimitry Andric /// 0xFFFFFFFF for true. 30390b57cec5SDimitry Andric /// 30400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30410b57cec5SDimitry Andric /// 30420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 30430b57cec5SDimitry Andric /// 30440b57cec5SDimitry Andric /// \param __a 30450b57cec5SDimitry Andric /// A 128-bit integer vector. 30460b57cec5SDimitry Andric /// \param __b 30470b57cec5SDimitry Andric /// A 128-bit integer vector. 30480b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3049*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, 3050*81ad6265SDimitry Andric __m128i __b) { 30510b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 30520b57cec5SDimitry Andric } 30530b57cec5SDimitry Andric 30540b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 30550b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 30560b57cec5SDimitry Andric /// greater than those in the second operand. Each comparison yields 0x0 for 30570b57cec5SDimitry Andric /// false, 0xFF for true. 30580b57cec5SDimitry Andric /// 30590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30600b57cec5SDimitry Andric /// 30610b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 30620b57cec5SDimitry Andric /// 30630b57cec5SDimitry Andric /// \param __a 30640b57cec5SDimitry Andric /// A 128-bit integer vector. 30650b57cec5SDimitry Andric /// \param __b 30660b57cec5SDimitry Andric /// A 128-bit integer vector. 30670b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3068*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, 3069*81ad6265SDimitry Andric __m128i __b) { 30700b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 30710b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 30720b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 30730b57cec5SDimitry Andric } 30740b57cec5SDimitry Andric 30750b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 30760b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 30770b57cec5SDimitry Andric /// are greater than those in the second operand. 30780b57cec5SDimitry Andric /// 30790b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 30800b57cec5SDimitry Andric /// 30810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30820b57cec5SDimitry Andric /// 30830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 30840b57cec5SDimitry Andric /// 30850b57cec5SDimitry Andric /// \param __a 30860b57cec5SDimitry Andric /// A 128-bit integer vector. 30870b57cec5SDimitry Andric /// \param __b 30880b57cec5SDimitry Andric /// A 128-bit integer vector. 30890b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3090*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, 3091*81ad6265SDimitry Andric __m128i __b) { 30920b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 30930b57cec5SDimitry Andric } 30940b57cec5SDimitry Andric 30950b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 30960b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 30970b57cec5SDimitry Andric /// are greater than those in the second operand. 30980b57cec5SDimitry Andric /// 30990b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 31000b57cec5SDimitry Andric /// 31010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31020b57cec5SDimitry Andric /// 31030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 31040b57cec5SDimitry Andric /// 31050b57cec5SDimitry Andric /// \param __a 31060b57cec5SDimitry Andric /// A 128-bit integer vector. 31070b57cec5SDimitry Andric /// \param __b 31080b57cec5SDimitry Andric /// A 128-bit integer vector. 31090b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3110*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, 3111*81ad6265SDimitry Andric __m128i __b) { 31120b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 31130b57cec5SDimitry Andric } 31140b57cec5SDimitry Andric 31150b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 31160b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 31170b57cec5SDimitry Andric /// than those in the second operand. 31180b57cec5SDimitry Andric /// 31190b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFF for true. 31200b57cec5SDimitry Andric /// 31210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31220b57cec5SDimitry Andric /// 31230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 31240b57cec5SDimitry Andric /// 31250b57cec5SDimitry Andric /// \param __a 31260b57cec5SDimitry Andric /// A 128-bit integer vector. 31270b57cec5SDimitry Andric /// \param __b 31280b57cec5SDimitry Andric /// A 128-bit integer vector. 31290b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3130*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, 3131*81ad6265SDimitry Andric __m128i __b) { 31320b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 31330b57cec5SDimitry Andric } 31340b57cec5SDimitry Andric 31350b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 31360b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31370b57cec5SDimitry Andric /// are less than those in the second operand. 31380b57cec5SDimitry Andric /// 31390b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFF for true. 31400b57cec5SDimitry Andric /// 31410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31420b57cec5SDimitry Andric /// 31430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 31440b57cec5SDimitry Andric /// 31450b57cec5SDimitry Andric /// \param __a 31460b57cec5SDimitry Andric /// A 128-bit integer vector. 31470b57cec5SDimitry Andric /// \param __b 31480b57cec5SDimitry Andric /// A 128-bit integer vector. 31490b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3150*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, 3151*81ad6265SDimitry Andric __m128i __b) { 31520b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 31530b57cec5SDimitry Andric } 31540b57cec5SDimitry Andric 31550b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 31560b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31570b57cec5SDimitry Andric /// are less than those in the second operand. 31580b57cec5SDimitry Andric /// 31590b57cec5SDimitry Andric /// Each comparison yields 0x0 for false, 0xFFFFFFFF for true. 31600b57cec5SDimitry Andric /// 31610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31620b57cec5SDimitry Andric /// 31630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 31640b57cec5SDimitry Andric /// 31650b57cec5SDimitry Andric /// \param __a 31660b57cec5SDimitry Andric /// A 128-bit integer vector. 31670b57cec5SDimitry Andric /// \param __b 31680b57cec5SDimitry Andric /// A 128-bit integer vector. 31690b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 3170*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, 3171*81ad6265SDimitry Andric __m128i __b) { 31720b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 31730b57cec5SDimitry Andric } 31740b57cec5SDimitry Andric 31750b57cec5SDimitry Andric #ifdef __x86_64__ 31760b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 31770b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 31780b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 31790b57cec5SDimitry Andric /// the upper element of the first operand. 31800b57cec5SDimitry Andric /// 31810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31820b57cec5SDimitry Andric /// 31830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 31840b57cec5SDimitry Andric /// 31850b57cec5SDimitry Andric /// \param __a 31860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 31870b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 31880b57cec5SDimitry Andric /// \param __b 31890b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 31900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 31910b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 31920b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 3193*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, 3194*81ad6265SDimitry Andric long long __b) { 31950b57cec5SDimitry Andric __a[0] = __b; 31960b57cec5SDimitry Andric return __a; 31970b57cec5SDimitry Andric } 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32000b57cec5SDimitry Andric /// 64-bit signed integer value, according to the current rounding mode. 32010b57cec5SDimitry Andric /// 32020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32030b57cec5SDimitry Andric /// 32040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 32050b57cec5SDimitry Andric /// 32060b57cec5SDimitry Andric /// \param __a 32070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32080b57cec5SDimitry Andric /// conversion. 32090b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 3210*81ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { 32110b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 32120b57cec5SDimitry Andric } 32130b57cec5SDimitry Andric 32140b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32150b57cec5SDimitry Andric /// 64-bit signed integer value, truncating the result when it is inexact. 32160b57cec5SDimitry Andric /// 32170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32180b57cec5SDimitry Andric /// 32190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 32200b57cec5SDimitry Andric /// instruction. 32210b57cec5SDimitry Andric /// 32220b57cec5SDimitry Andric /// \param __a 32230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32240b57cec5SDimitry Andric /// conversion. 32250b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 3226*81ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { 32270b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 32280b57cec5SDimitry Andric } 32290b57cec5SDimitry Andric #endif 32300b57cec5SDimitry Andric 32310b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 32320b57cec5SDimitry Andric /// 32330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32340b57cec5SDimitry Andric /// 32350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 32360b57cec5SDimitry Andric /// 32370b57cec5SDimitry Andric /// \param __a 32380b57cec5SDimitry Andric /// A 128-bit integer vector. 32390b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 3240*81ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { 32410b57cec5SDimitry Andric return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); 32420b57cec5SDimitry Andric } 32430b57cec5SDimitry Andric 32440b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 32450b57cec5SDimitry Andric /// 32460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32470b57cec5SDimitry Andric /// 32480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 32490b57cec5SDimitry Andric /// 32500b57cec5SDimitry Andric /// \param __a 32510b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 32520b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 32530b57cec5SDimitry Andric /// values. 3254*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { 32550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 32560b57cec5SDimitry Andric } 32570b57cec5SDimitry Andric 32580b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32], 32590b57cec5SDimitry Andric /// truncating the result when it is inexact. 32600b57cec5SDimitry Andric /// 32610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32620b57cec5SDimitry Andric /// 32630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 32640b57cec5SDimitry Andric /// instruction. 32650b57cec5SDimitry Andric /// 32660b57cec5SDimitry Andric /// \param __a 32670b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 32680b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 3269*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { 32700b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 32710b57cec5SDimitry Andric } 32720b57cec5SDimitry Andric 32730b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 32740b57cec5SDimitry Andric /// operand and the remaining elements are zero. 32750b57cec5SDimitry Andric /// 32760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32770b57cec5SDimitry Andric /// 32780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 32790b57cec5SDimitry Andric /// 32800b57cec5SDimitry Andric /// \param __a 32810b57cec5SDimitry Andric /// A 32-bit signed integer operand. 32820b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 3283*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { 32840b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; 32850b57cec5SDimitry Andric } 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 32880b57cec5SDimitry Andric /// operand and the upper element is zero. 32890b57cec5SDimitry Andric /// 32900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32910b57cec5SDimitry Andric /// 3292*81ad6265SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction 3293*81ad6265SDimitry Andric /// in 64-bit mode. 32940b57cec5SDimitry Andric /// 32950b57cec5SDimitry Andric /// \param __a 32960b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 32970b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 3298*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { 32990b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__a, 0}; 33000b57cec5SDimitry Andric } 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 33030b57cec5SDimitry Andric /// 32-bit signed integer value. 33040b57cec5SDimitry Andric /// 33050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33060b57cec5SDimitry Andric /// 33070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 33080b57cec5SDimitry Andric /// 33090b57cec5SDimitry Andric /// \param __a 33100b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 33110b57cec5SDimitry Andric /// destination. 33120b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 3313*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { 33140b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 33150b57cec5SDimitry Andric return __b[0]; 33160b57cec5SDimitry Andric } 33170b57cec5SDimitry Andric 33180b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 33190b57cec5SDimitry Andric /// 64-bit signed integer value. 33200b57cec5SDimitry Andric /// 33210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33220b57cec5SDimitry Andric /// 33230b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 33240b57cec5SDimitry Andric /// 33250b57cec5SDimitry Andric /// \param __a 33260b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 33270b57cec5SDimitry Andric /// destination. 33280b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 3329*81ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { 33300b57cec5SDimitry Andric return __a[0]; 33310b57cec5SDimitry Andric } 33320b57cec5SDimitry Andric 33330b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 33340b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 33350b57cec5SDimitry Andric /// 33360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33370b57cec5SDimitry Andric /// 33380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 33390b57cec5SDimitry Andric /// 33400b57cec5SDimitry Andric /// \param __p 33410b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 33420b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 33430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3344*81ad6265SDimitry Andric _mm_load_si128(__m128i const *__p) { 33450b57cec5SDimitry Andric return *__p; 33460b57cec5SDimitry Andric } 33470b57cec5SDimitry Andric 33480b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 33490b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 33500b57cec5SDimitry Andric /// 33510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33520b57cec5SDimitry Andric /// 33530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 33540b57cec5SDimitry Andric /// 33550b57cec5SDimitry Andric /// \param __p 33560b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 33570b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 33580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3359*81ad6265SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) { 33600b57cec5SDimitry Andric struct __loadu_si128 { 33610b57cec5SDimitry Andric __m128i_u __v; 33620b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3363480093f4SDimitry Andric return ((const struct __loadu_si128 *)__p)->__v; 33640b57cec5SDimitry Andric } 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 33670b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 33680b57cec5SDimitry Andric /// 33690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33700b57cec5SDimitry Andric /// 33710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 33720b57cec5SDimitry Andric /// 33730b57cec5SDimitry Andric /// \param __p 33740b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 33750b57cec5SDimitry Andric /// the destination. 33760b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 33770b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 33780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3379*81ad6265SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) { 33800b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 33810b57cec5SDimitry Andric long long __u; 33820b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3383*81ad6265SDimitry Andric return __extension__(__m128i){ 3384*81ad6265SDimitry Andric ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; 33850b57cec5SDimitry Andric } 33860b57cec5SDimitry Andric 33870b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 33880b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 33890b57cec5SDimitry Andric /// argument is required but the value is not actually used. 33900b57cec5SDimitry Andric /// 33910b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33920b57cec5SDimitry Andric /// 33930b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 33940b57cec5SDimitry Andric /// 33950b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 3396*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { 33970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 33980b57cec5SDimitry Andric } 33990b57cec5SDimitry Andric 34000b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 34010b57cec5SDimitry Andric /// the specified 64-bit integer values. 34020b57cec5SDimitry Andric /// 34030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34040b57cec5SDimitry Andric /// 34050b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34060b57cec5SDimitry Andric /// instruction. 34070b57cec5SDimitry Andric /// 34080b57cec5SDimitry Andric /// \param __q1 34090b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 34100b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34110b57cec5SDimitry Andric /// \param __q0 34120b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 34130b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34140b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 34150b57cec5SDimitry Andric /// provided in the operands. 3416*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, 3417*81ad6265SDimitry Andric long long __q0) { 34180b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__q0, __q1}; 34190b57cec5SDimitry Andric } 34200b57cec5SDimitry Andric 34210b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 34220b57cec5SDimitry Andric /// the specified 64-bit integer values. 34230b57cec5SDimitry Andric /// 34240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34250b57cec5SDimitry Andric /// 34260b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34270b57cec5SDimitry Andric /// instruction. 34280b57cec5SDimitry Andric /// 34290b57cec5SDimitry Andric /// \param __q1 34300b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 34310b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34320b57cec5SDimitry Andric /// \param __q0 34330b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 34340b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34350b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 34360b57cec5SDimitry Andric /// provided in the operands. 3437*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, 3438*81ad6265SDimitry Andric __m64 __q0) { 34390b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 34400b57cec5SDimitry Andric } 34410b57cec5SDimitry Andric 34420b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 34430b57cec5SDimitry Andric /// the specified 32-bit integer values. 34440b57cec5SDimitry Andric /// 34450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34460b57cec5SDimitry Andric /// 34470b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34480b57cec5SDimitry Andric /// instruction. 34490b57cec5SDimitry Andric /// 34500b57cec5SDimitry Andric /// \param __i3 34510b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 34520b57cec5SDimitry Andric /// destination vector. 34530b57cec5SDimitry Andric /// \param __i2 34540b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 34550b57cec5SDimitry Andric /// vector. 34560b57cec5SDimitry Andric /// \param __i1 34570b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 34580b57cec5SDimitry Andric /// vector. 34590b57cec5SDimitry Andric /// \param __i0 34600b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 34610b57cec5SDimitry Andric /// vector. 34620b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 34630b57cec5SDimitry Andric /// provided in the operands. 3464*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, 3465*81ad6265SDimitry Andric int __i1, int __i0) { 34660b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; 34670b57cec5SDimitry Andric } 34680b57cec5SDimitry Andric 34690b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 34700b57cec5SDimitry Andric /// the specified 16-bit integer values. 34710b57cec5SDimitry Andric /// 34720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34730b57cec5SDimitry Andric /// 34740b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34750b57cec5SDimitry Andric /// instruction. 34760b57cec5SDimitry Andric /// 34770b57cec5SDimitry Andric /// \param __w7 34780b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 34790b57cec5SDimitry Andric /// destination vector. 34800b57cec5SDimitry Andric /// \param __w6 34810b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 34820b57cec5SDimitry Andric /// destination vector. 34830b57cec5SDimitry Andric /// \param __w5 34840b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 34850b57cec5SDimitry Andric /// vector. 34860b57cec5SDimitry Andric /// \param __w4 34870b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 34880b57cec5SDimitry Andric /// vector. 34890b57cec5SDimitry Andric /// \param __w3 34900b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 34910b57cec5SDimitry Andric /// vector. 34920b57cec5SDimitry Andric /// \param __w2 34930b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 34940b57cec5SDimitry Andric /// vector. 34950b57cec5SDimitry Andric /// \param __w1 34960b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 34970b57cec5SDimitry Andric /// vector. 34980b57cec5SDimitry Andric /// \param __w0 34990b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 35000b57cec5SDimitry Andric /// vector. 35010b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 35020b57cec5SDimitry Andric /// provided in the operands. 35030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3504*81ad6265SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, 3505*81ad6265SDimitry Andric short __w2, short __w1, short __w0) { 3506*81ad6265SDimitry Andric return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, 3507*81ad6265SDimitry Andric __w4, __w5, __w6, __w7}; 35080b57cec5SDimitry Andric } 35090b57cec5SDimitry Andric 35100b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 35110b57cec5SDimitry Andric /// the specified 8-bit integer values. 35120b57cec5SDimitry Andric /// 35130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35140b57cec5SDimitry Andric /// 35150b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35160b57cec5SDimitry Andric /// instruction. 35170b57cec5SDimitry Andric /// 35180b57cec5SDimitry Andric /// \param __b15 35190b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 35200b57cec5SDimitry Andric /// \param __b14 35210b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 35220b57cec5SDimitry Andric /// \param __b13 35230b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 35240b57cec5SDimitry Andric /// \param __b12 35250b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 35260b57cec5SDimitry Andric /// \param __b11 35270b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 35280b57cec5SDimitry Andric /// \param __b10 35290b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 35300b57cec5SDimitry Andric /// \param __b9 35310b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 35320b57cec5SDimitry Andric /// \param __b8 35330b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 35340b57cec5SDimitry Andric /// \param __b7 35350b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 35360b57cec5SDimitry Andric /// \param __b6 35370b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 35380b57cec5SDimitry Andric /// \param __b5 35390b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 35400b57cec5SDimitry Andric /// \param __b4 35410b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 35420b57cec5SDimitry Andric /// \param __b3 35430b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 35440b57cec5SDimitry Andric /// \param __b2 35450b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 35460b57cec5SDimitry Andric /// \param __b1 35470b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 35480b57cec5SDimitry Andric /// \param __b0 35490b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 35500b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 35510b57cec5SDimitry Andric /// provided in the operands. 35520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3553*81ad6265SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, 3554*81ad6265SDimitry Andric char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, 3555*81ad6265SDimitry Andric char __b4, char __b3, char __b2, char __b1, char __b0) { 3556*81ad6265SDimitry Andric return __extension__(__m128i)(__v16qi){ 3557*81ad6265SDimitry Andric __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, 3558*81ad6265SDimitry Andric __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; 35590b57cec5SDimitry Andric } 35600b57cec5SDimitry Andric 35610b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 35620b57cec5SDimitry Andric /// specified 64-bit integer value. 35630b57cec5SDimitry Andric /// 35640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35650b57cec5SDimitry Andric /// 35660b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35670b57cec5SDimitry Andric /// instruction. 35680b57cec5SDimitry Andric /// 35690b57cec5SDimitry Andric /// \param __q 35700b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 35710b57cec5SDimitry Andric /// vector. 35720b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 35730b57cec5SDimitry Andric /// elements containing the value provided in the operand. 3574*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { 35750b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 35760b57cec5SDimitry Andric } 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 35790b57cec5SDimitry Andric /// specified 64-bit value. 35800b57cec5SDimitry Andric /// 35810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35820b57cec5SDimitry Andric /// 35830b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35840b57cec5SDimitry Andric /// instruction. 35850b57cec5SDimitry Andric /// 35860b57cec5SDimitry Andric /// \param __q 35870b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 35880b57cec5SDimitry Andric /// vector. 35890b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 35900b57cec5SDimitry Andric /// containing the value provided in the operand. 3591*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { 35920b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 35930b57cec5SDimitry Andric } 35940b57cec5SDimitry Andric 35950b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 35960b57cec5SDimitry Andric /// specified 32-bit value. 35970b57cec5SDimitry Andric /// 35980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35990b57cec5SDimitry Andric /// 36000b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36010b57cec5SDimitry Andric /// instruction. 36020b57cec5SDimitry Andric /// 36030b57cec5SDimitry Andric /// \param __i 36040b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 36050b57cec5SDimitry Andric /// vector. 36060b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 36070b57cec5SDimitry Andric /// containing the value provided in the operand. 3608*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { 36090b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 36100b57cec5SDimitry Andric } 36110b57cec5SDimitry Andric 36120b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 36130b57cec5SDimitry Andric /// specified 16-bit value. 36140b57cec5SDimitry Andric /// 36150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36160b57cec5SDimitry Andric /// 36170b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36180b57cec5SDimitry Andric /// instruction. 36190b57cec5SDimitry Andric /// 36200b57cec5SDimitry Andric /// \param __w 36210b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 36220b57cec5SDimitry Andric /// vector. 36230b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 36240b57cec5SDimitry Andric /// containing the value provided in the operand. 3625*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { 36260b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 36270b57cec5SDimitry Andric } 36280b57cec5SDimitry Andric 36290b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 36300b57cec5SDimitry Andric /// specified 8-bit value. 36310b57cec5SDimitry Andric /// 36320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36330b57cec5SDimitry Andric /// 36340b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36350b57cec5SDimitry Andric /// instruction. 36360b57cec5SDimitry Andric /// 36370b57cec5SDimitry Andric /// \param __b 36380b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 36390b57cec5SDimitry Andric /// vector. 36400b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 36410b57cec5SDimitry Andric /// containing the value provided in the operand. 3642*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { 3643*81ad6265SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 3644*81ad6265SDimitry Andric __b, __b, __b, __b, __b); 36450b57cec5SDimitry Andric } 36460b57cec5SDimitry Andric 36470b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 36480b57cec5SDimitry Andric /// with the specified 64-bit integral values. 36490b57cec5SDimitry Andric /// 36500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36510b57cec5SDimitry Andric /// 36520b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 36530b57cec5SDimitry Andric /// 36540b57cec5SDimitry Andric /// \param __q0 36550b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 36560b57cec5SDimitry Andric /// result. 36570b57cec5SDimitry Andric /// \param __q1 36580b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 36590b57cec5SDimitry Andric /// result. 36600b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3661*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, 3662*81ad6265SDimitry Andric __m64 __q1) { 36630b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 36640b57cec5SDimitry Andric } 36650b57cec5SDimitry Andric 36660b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 36670b57cec5SDimitry Andric /// with the specified 32-bit integral values. 36680b57cec5SDimitry Andric /// 36690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36700b57cec5SDimitry Andric /// 36710b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36720b57cec5SDimitry Andric /// instruction. 36730b57cec5SDimitry Andric /// 36740b57cec5SDimitry Andric /// \param __i0 36750b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 36760b57cec5SDimitry Andric /// \param __i1 36770b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 36780b57cec5SDimitry Andric /// \param __i2 36790b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 36800b57cec5SDimitry Andric /// \param __i3 36810b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 36820b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 3683*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, 3684*81ad6265SDimitry Andric int __i2, 3685*81ad6265SDimitry Andric int __i3) { 36860b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 36870b57cec5SDimitry Andric } 36880b57cec5SDimitry Andric 36890b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 36900b57cec5SDimitry Andric /// with the specified 16-bit integral values. 36910b57cec5SDimitry Andric /// 36920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36930b57cec5SDimitry Andric /// 36940b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36950b57cec5SDimitry Andric /// instruction. 36960b57cec5SDimitry Andric /// 36970b57cec5SDimitry Andric /// \param __w0 36980b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 36990b57cec5SDimitry Andric /// \param __w1 37000b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 37010b57cec5SDimitry Andric /// \param __w2 37020b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 37030b57cec5SDimitry Andric /// \param __w3 37040b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 37050b57cec5SDimitry Andric /// \param __w4 37060b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 37070b57cec5SDimitry Andric /// \param __w5 37080b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 37090b57cec5SDimitry Andric /// \param __w6 37100b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 37110b57cec5SDimitry Andric /// \param __w7 37120b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 37130b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 37140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3715*81ad6265SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, 3716*81ad6265SDimitry Andric short __w5, short __w6, short __w7) { 37170b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 37180b57cec5SDimitry Andric } 37190b57cec5SDimitry Andric 37200b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37210b57cec5SDimitry Andric /// with the specified 8-bit integral values. 37220b57cec5SDimitry Andric /// 37230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37240b57cec5SDimitry Andric /// 37250b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37260b57cec5SDimitry Andric /// instruction. 37270b57cec5SDimitry Andric /// 37280b57cec5SDimitry Andric /// \param __b0 37290b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 37300b57cec5SDimitry Andric /// \param __b1 37310b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 37320b57cec5SDimitry Andric /// \param __b2 37330b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 37340b57cec5SDimitry Andric /// \param __b3 37350b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 37360b57cec5SDimitry Andric /// \param __b4 37370b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 37380b57cec5SDimitry Andric /// \param __b5 37390b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 37400b57cec5SDimitry Andric /// \param __b6 37410b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 37420b57cec5SDimitry Andric /// \param __b7 37430b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 37440b57cec5SDimitry Andric /// \param __b8 37450b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 37460b57cec5SDimitry Andric /// \param __b9 37470b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 37480b57cec5SDimitry Andric /// \param __b10 37490b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 37500b57cec5SDimitry Andric /// \param __b11 37510b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 37520b57cec5SDimitry Andric /// \param __b12 37530b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 37540b57cec5SDimitry Andric /// \param __b13 37550b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 37560b57cec5SDimitry Andric /// \param __b14 37570b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 37580b57cec5SDimitry Andric /// \param __b15 37590b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 37600b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 37610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3762*81ad6265SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 3763*81ad6265SDimitry Andric char __b6, char __b7, char __b8, char __b9, char __b10, 3764*81ad6265SDimitry Andric char __b11, char __b12, char __b13, char __b14, char __b15) { 3765*81ad6265SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, 3766*81ad6265SDimitry Andric __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 37670b57cec5SDimitry Andric } 37680b57cec5SDimitry Andric 37690b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 37700b57cec5SDimitry Andric /// 37710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37720b57cec5SDimitry Andric /// 37730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 37740b57cec5SDimitry Andric /// 37750b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 37760b57cec5SDimitry Andric /// zero. 3777*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { 37780b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){0LL, 0LL}; 37790b57cec5SDimitry Andric } 37800b57cec5SDimitry Andric 37810b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 37820b57cec5SDimitry Andric /// 128-bit boundary. 37830b57cec5SDimitry Andric /// 37840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37850b57cec5SDimitry Andric /// 37860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 37870b57cec5SDimitry Andric /// 37880b57cec5SDimitry Andric /// \param __p 37890b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 37900b57cec5SDimitry Andric /// values. 37910b57cec5SDimitry Andric /// \param __b 37920b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 3793*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, 3794*81ad6265SDimitry Andric __m128i __b) { 37950b57cec5SDimitry Andric *__p = __b; 37960b57cec5SDimitry Andric } 37970b57cec5SDimitry Andric 37980b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 37990b57cec5SDimitry Andric /// 38000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38010b57cec5SDimitry Andric /// 38020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 38030b57cec5SDimitry Andric /// 38040b57cec5SDimitry Andric /// \param __p 38050b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 38060b57cec5SDimitry Andric /// \param __b 38070b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 3808*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, 3809*81ad6265SDimitry Andric __m128i __b) { 38100b57cec5SDimitry Andric struct __storeu_si128 { 38110b57cec5SDimitry Andric __m128i_u __v; 38120b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38130b57cec5SDimitry Andric ((struct __storeu_si128 *)__p)->__v = __b; 38140b57cec5SDimitry Andric } 38150b57cec5SDimitry Andric 38160b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 38170b57cec5SDimitry Andric /// vector. 38180b57cec5SDimitry Andric /// 38190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38200b57cec5SDimitry Andric /// 38210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 38220b57cec5SDimitry Andric /// 38230b57cec5SDimitry Andric /// \param __p 38240b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 3825e8d8bef9SDimitry Andric /// location does not have to be aligned. 38260b57cec5SDimitry Andric /// \param __b 38270b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 3828*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, 3829*81ad6265SDimitry Andric __m128i __b) { 38300b57cec5SDimitry Andric struct __storeu_si64 { 38310b57cec5SDimitry Andric long long __v; 38320b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38330b57cec5SDimitry Andric ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; 38340b57cec5SDimitry Andric } 38350b57cec5SDimitry Andric 38360b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 38370b57cec5SDimitry Andric /// vector. 38380b57cec5SDimitry Andric /// 38390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38400b57cec5SDimitry Andric /// 38410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 38420b57cec5SDimitry Andric /// 38430b57cec5SDimitry Andric /// \param __p 38440b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 38450b57cec5SDimitry Andric /// location does not have to be aligned. 38460b57cec5SDimitry Andric /// \param __b 38470b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 3848*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, 3849*81ad6265SDimitry Andric __m128i __b) { 38500b57cec5SDimitry Andric struct __storeu_si32 { 38510b57cec5SDimitry Andric int __v; 38520b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38530b57cec5SDimitry Andric ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; 38540b57cec5SDimitry Andric } 38550b57cec5SDimitry Andric 38560b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 38570b57cec5SDimitry Andric /// vector. 38580b57cec5SDimitry Andric /// 38590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38600b57cec5SDimitry Andric /// 38610b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 38620b57cec5SDimitry Andric /// 38630b57cec5SDimitry Andric /// \param __p 38640b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 38650b57cec5SDimitry Andric /// location does not have to be aligned. 38660b57cec5SDimitry Andric /// \param __b 38670b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 3868*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, 3869*81ad6265SDimitry Andric __m128i __b) { 38700b57cec5SDimitry Andric struct __storeu_si16 { 38710b57cec5SDimitry Andric short __v; 38720b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38730b57cec5SDimitry Andric ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; 38740b57cec5SDimitry Andric } 38750b57cec5SDimitry Andric 38760b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 38770b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 38780b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 38790b57cec5SDimitry Andric /// 38800b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 38810b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 38820b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 38830b57cec5SDimitry Andric /// 38840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38850b57cec5SDimitry Andric /// 38860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 38870b57cec5SDimitry Andric /// instruction. 38880b57cec5SDimitry Andric /// 38890b57cec5SDimitry Andric /// \param __d 38900b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 38910b57cec5SDimitry Andric /// \param __n 38920b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 38930b57cec5SDimitry Andric /// each byte represents the mask bits. 38940b57cec5SDimitry Andric /// \param __p 38950b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 38960b57cec5SDimitry Andric /// values are moved. 3897*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, 3898*81ad6265SDimitry Andric __m128i __n, 3899*81ad6265SDimitry Andric char *__p) { 39000b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 39010b57cec5SDimitry Andric } 39020b57cec5SDimitry Andric 39030b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 39040b57cec5SDimitry Andric /// a memory location. 39050b57cec5SDimitry Andric /// 39060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39070b57cec5SDimitry Andric /// 39080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 39090b57cec5SDimitry Andric /// 39100b57cec5SDimitry Andric /// \param __p 39110b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 39120b57cec5SDimitry Andric /// of the integer vector parameter. 39130b57cec5SDimitry Andric /// \param __a 39140b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 39150b57cec5SDimitry Andric /// value to be stored. 3916*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, 3917*81ad6265SDimitry Andric __m128i __a) { 39180b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 39190b57cec5SDimitry Andric long long __u; 39200b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 39210b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; 39220b57cec5SDimitry Andric } 39230b57cec5SDimitry Andric 39240b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 39250b57cec5SDimitry Andric /// aligned memory location. 39260b57cec5SDimitry Andric /// 39270b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39280b57cec5SDimitry Andric /// used again soon). 39290b57cec5SDimitry Andric /// 39300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39310b57cec5SDimitry Andric /// 39320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 39330b57cec5SDimitry Andric /// 39340b57cec5SDimitry Andric /// \param __p 39350b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 39360b57cec5SDimitry Andric /// \param __a 39370b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 3938*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, 3939*81ad6265SDimitry Andric __m128d __a) { 39400b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); 39410b57cec5SDimitry Andric } 39420b57cec5SDimitry Andric 39430b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 39440b57cec5SDimitry Andric /// 39450b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39460b57cec5SDimitry Andric /// used again soon). 39470b57cec5SDimitry Andric /// 39480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39490b57cec5SDimitry Andric /// 39500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 39510b57cec5SDimitry Andric /// 39520b57cec5SDimitry Andric /// \param __p 39530b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 39540b57cec5SDimitry Andric /// \param __a 39550b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 3956*81ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, 3957*81ad6265SDimitry Andric __m128i __a) { 39580b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); 39590b57cec5SDimitry Andric } 39600b57cec5SDimitry Andric 39610b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 39620b57cec5SDimitry Andric /// 39630b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39640b57cec5SDimitry Andric /// used again soon). 39650b57cec5SDimitry Andric /// 39660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39670b57cec5SDimitry Andric /// 39680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 39690b57cec5SDimitry Andric /// 39700b57cec5SDimitry Andric /// \param __p 39710b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 39720b57cec5SDimitry Andric /// \param __a 39730b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 3974*81ad6265SDimitry Andric static __inline__ void 3975*81ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 3976*81ad6265SDimitry Andric _mm_stream_si32(int *__p, int __a) { 39770b57cec5SDimitry Andric __builtin_ia32_movnti(__p, __a); 39780b57cec5SDimitry Andric } 39790b57cec5SDimitry Andric 39800b57cec5SDimitry Andric #ifdef __x86_64__ 39810b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 39820b57cec5SDimitry Andric /// 39830b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39840b57cec5SDimitry Andric /// used again soon). 39850b57cec5SDimitry Andric /// 39860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39870b57cec5SDimitry Andric /// 39880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 39890b57cec5SDimitry Andric /// 39900b57cec5SDimitry Andric /// \param __p 39910b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 39920b57cec5SDimitry Andric /// \param __a 39930b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 3994*81ad6265SDimitry Andric static __inline__ void 3995*81ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 3996*81ad6265SDimitry Andric _mm_stream_si64(long long *__p, long long __a) { 39970b57cec5SDimitry Andric __builtin_ia32_movnti64(__p, __a); 39980b57cec5SDimitry Andric } 39990b57cec5SDimitry Andric #endif 40000b57cec5SDimitry Andric 40010b57cec5SDimitry Andric #if defined(__cplusplus) 40020b57cec5SDimitry Andric extern "C" { 40030b57cec5SDimitry Andric #endif 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 40060b57cec5SDimitry Andric /// caches in the coherency domain. 40070b57cec5SDimitry Andric /// 40080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40090b57cec5SDimitry Andric /// 40100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 40110b57cec5SDimitry Andric /// 40120b57cec5SDimitry Andric /// \param __p 40130b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 40140b57cec5SDimitry Andric /// flushed. 40150b57cec5SDimitry Andric void _mm_clflush(void const *__p); 40160b57cec5SDimitry Andric 40170b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 40180b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 40190b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 40200b57cec5SDimitry Andric /// executing subsequent loads. 40210b57cec5SDimitry Andric /// 40220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40230b57cec5SDimitry Andric /// 40240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 40250b57cec5SDimitry Andric /// 40260b57cec5SDimitry Andric void _mm_lfence(void); 40270b57cec5SDimitry Andric 40280b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 40290b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 40300b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 40310b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 40320b57cec5SDimitry Andric /// 40330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40340b57cec5SDimitry Andric /// 40350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 40360b57cec5SDimitry Andric /// 40370b57cec5SDimitry Andric void _mm_mfence(void); 40380b57cec5SDimitry Andric 40390b57cec5SDimitry Andric #if defined(__cplusplus) 40400b57cec5SDimitry Andric } // extern "C" 40410b57cec5SDimitry Andric #endif 40420b57cec5SDimitry Andric 40430b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 40440b57cec5SDimitry Andric /// operands into 8-bit signed integers, and packs the results into the 40450b57cec5SDimitry Andric /// destination. Positive values greater than 0x7F are saturated to 0x7F. 40460b57cec5SDimitry Andric /// Negative values less than 0x80 are saturated to 0x80. 40470b57cec5SDimitry Andric /// 40480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40490b57cec5SDimitry Andric /// 40500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 40510b57cec5SDimitry Andric /// 40520b57cec5SDimitry Andric /// \param __a 40530b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 40540b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 40550b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 40560b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 40570b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 40580b57cec5SDimitry Andric /// \param __b 40590b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 40600b57cec5SDimitry Andric /// a signed integer and is converted to a 8-bit signed integer with 40610b57cec5SDimitry Andric /// saturation. Values greater than 0x7F are saturated to 0x7F. Values less 40620b57cec5SDimitry Andric /// than 0x80 are saturated to 0x80. The converted [8 x i8] values are 40630b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 40640b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 4065*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, 4066*81ad6265SDimitry Andric __m128i __b) { 40670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 40680b57cec5SDimitry Andric } 40690b57cec5SDimitry Andric 40700b57cec5SDimitry Andric /// Converts 32-bit signed integers from both 128-bit integer vector 40710b57cec5SDimitry Andric /// operands into 16-bit signed integers, and packs the results into the 40720b57cec5SDimitry Andric /// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF. 40730b57cec5SDimitry Andric /// Negative values less than 0x8000 are saturated to 0x8000. 40740b57cec5SDimitry Andric /// 40750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40760b57cec5SDimitry Andric /// 40770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 40780b57cec5SDimitry Andric /// 40790b57cec5SDimitry Andric /// \param __a 40800b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 40810b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 40820b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 40830b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 40840b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 40850b57cec5SDimitry Andric /// \param __b 40860b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as 40870b57cec5SDimitry Andric /// a signed integer and is converted to a 16-bit signed integer with 40880b57cec5SDimitry Andric /// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values 40890b57cec5SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values 40900b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 40910b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 4092*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, 4093*81ad6265SDimitry Andric __m128i __b) { 40940b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 40950b57cec5SDimitry Andric } 40960b57cec5SDimitry Andric 40970b57cec5SDimitry Andric /// Converts 16-bit signed integers from both 128-bit integer vector 40980b57cec5SDimitry Andric /// operands into 8-bit unsigned integers, and packs the results into the 40990b57cec5SDimitry Andric /// destination. Values greater than 0xFF are saturated to 0xFF. Values less 41000b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. 41010b57cec5SDimitry Andric /// 41020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41030b57cec5SDimitry Andric /// 41040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 41050b57cec5SDimitry Andric /// 41060b57cec5SDimitry Andric /// \param __a 41070b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 41080b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 41090b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 41100b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 41110b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 41120b57cec5SDimitry Andric /// \param __b 41130b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as 41140b57cec5SDimitry Andric /// a signed integer and is converted to an 8-bit unsigned integer with 41150b57cec5SDimitry Andric /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 41160b57cec5SDimitry Andric /// than 0x00 are saturated to 0x00. The converted [8 x i8] values are 41170b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 41180b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 4119*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, 4120*81ad6265SDimitry Andric __m128i __b) { 41210b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 41220b57cec5SDimitry Andric } 41230b57cec5SDimitry Andric 41240b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 41250b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 41260b57cec5SDimitry Andric /// 41270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41280b57cec5SDimitry Andric /// 4129*81ad6265SDimitry Andric /// \code 4130*81ad6265SDimitry Andric /// __m128i _mm_extract_epi16(__m128i a, const int imm); 4131*81ad6265SDimitry Andric /// \endcode 4132*81ad6265SDimitry Andric /// 41330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 41340b57cec5SDimitry Andric /// 4135*81ad6265SDimitry Andric /// \param a 41360b57cec5SDimitry Andric /// A 128-bit integer vector. 4137*81ad6265SDimitry Andric /// \param imm 4138*81ad6265SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a a to be assigned 41390b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 4140*81ad6265SDimitry Andric /// 000: assign values from bits [15:0] of \a a. \n 4141*81ad6265SDimitry Andric /// 001: assign values from bits [31:16] of \a a. \n 4142*81ad6265SDimitry Andric /// 010: assign values from bits [47:32] of \a a. \n 4143*81ad6265SDimitry Andric /// 011: assign values from bits [63:48] of \a a. \n 4144*81ad6265SDimitry Andric /// 100: assign values from bits [79:64] of \a a. \n 4145*81ad6265SDimitry Andric /// 101: assign values from bits [95:80] of \a a. \n 4146*81ad6265SDimitry Andric /// 110: assign values from bits [111:96] of \a a. \n 4147*81ad6265SDimitry Andric /// 111: assign values from bits [127:112] of \a a. 41480b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 41490b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 41500b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 4151349cc55cSDimitry Andric ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 4152349cc55cSDimitry Andric (int)(imm))) 41530b57cec5SDimitry Andric 41540b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 41550b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 41560b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 41570b57cec5SDimitry Andric /// parameter. 41580b57cec5SDimitry Andric /// 41590b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41600b57cec5SDimitry Andric /// 4161*81ad6265SDimitry Andric /// \code 4162*81ad6265SDimitry Andric /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); 4163*81ad6265SDimitry Andric /// \endcode 4164*81ad6265SDimitry Andric /// 41650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 41660b57cec5SDimitry Andric /// 4167*81ad6265SDimitry Andric /// \param a 41680b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 41690b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 4170*81ad6265SDimitry Andric /// the lower 16 bits of \a b. 4171*81ad6265SDimitry Andric /// \param b 41720b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 4173*81ad6265SDimitry Andric /// result beginning at an offset specified by \a imm. 4174*81ad6265SDimitry Andric /// \param imm 41750b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 4176*81ad6265SDimitry Andric /// lower 16 bits of \a b are written. 41770b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 41780b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 4179349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 4180349cc55cSDimitry Andric (int)(imm))) 41810b57cec5SDimitry Andric 41820b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 41830b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 41840b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 41850b57cec5SDimitry Andric /// 41860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41870b57cec5SDimitry Andric /// 41880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 41890b57cec5SDimitry Andric /// 41900b57cec5SDimitry Andric /// \param __a 41910b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 41920b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 41930b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 4194*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { 41950b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 41960b57cec5SDimitry Andric } 41970b57cec5SDimitry Andric 41980b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 41990b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 42000b57cec5SDimitry Andric /// parameter as a specifier. 42010b57cec5SDimitry Andric /// 42020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42030b57cec5SDimitry Andric /// 42040b57cec5SDimitry Andric /// \code 42050b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 42060b57cec5SDimitry Andric /// \endcode 42070b57cec5SDimitry Andric /// 42080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 42090b57cec5SDimitry Andric /// 42100b57cec5SDimitry Andric /// \param a 42110b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 42120b57cec5SDimitry Andric /// \param imm 42130b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 42140b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 42150b57cec5SDimitry Andric /// values as follows: \n 42160b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 42170b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 42180b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 42190b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 42200b57cec5SDimitry Andric /// Bit value assignments: \n 42210b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 42220b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 42230b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 4224*81ad6265SDimitry Andric /// 11: assign values from bits [127:96] of \a a. \n 4225*81ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 4226*81ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 4227*81ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42280b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42290b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 4230349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) 42310b57cec5SDimitry Andric 42320b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 42330b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 42340b57cec5SDimitry Andric /// value parameter as a specifier. 42350b57cec5SDimitry Andric /// 42360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42370b57cec5SDimitry Andric /// 42380b57cec5SDimitry Andric /// \code 42390b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 42400b57cec5SDimitry Andric /// \endcode 42410b57cec5SDimitry Andric /// 42420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 42430b57cec5SDimitry Andric /// 42440b57cec5SDimitry Andric /// \param a 42450b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 42460b57cec5SDimitry Andric /// [127:64] of the result. 42470b57cec5SDimitry Andric /// \param imm 42480b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 42490b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 42500b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 42510b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 42520b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 42530b57cec5SDimitry Andric /// Bit value assignments: \n 42540b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 42550b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 42560b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 42570b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 4258*81ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 4259*81ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 4260*81ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42610b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42620b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 4263349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) 42640b57cec5SDimitry Andric 42650b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 42660b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 42670b57cec5SDimitry Andric /// value parameter as a specifier. 42680b57cec5SDimitry Andric /// 42690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42700b57cec5SDimitry Andric /// 42710b57cec5SDimitry Andric /// \code 42720b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 42730b57cec5SDimitry Andric /// \endcode 42740b57cec5SDimitry Andric /// 42750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 42760b57cec5SDimitry Andric /// 42770b57cec5SDimitry Andric /// \param a 42780b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 42790b57cec5SDimitry Andric /// [63:0] of the result. 42800b57cec5SDimitry Andric /// \param imm 42810b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 42820b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 42830b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 42840b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 42850b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 42860b57cec5SDimitry Andric /// Bit value assignments: \n 42870b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 42880b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 42890b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 42900b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 4291*81ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 4292*81ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 4293*81ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42940b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42950b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 4296349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) 42970b57cec5SDimitry Andric 42980b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 42990b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 43000b57cec5SDimitry Andric /// 43010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43020b57cec5SDimitry Andric /// 43030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 43040b57cec5SDimitry Andric /// instruction. 43050b57cec5SDimitry Andric /// 43060b57cec5SDimitry Andric /// \param __a 43070b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 43080b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 43090b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 43100b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 43110b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 43120b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 43130b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 43140b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 43150b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 43160b57cec5SDimitry Andric /// \param __b 43170b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 43180b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 43190b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 43200b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 43210b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 43220b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 43230b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 43240b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 43250b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 43260b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 4327*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, 4328*81ad6265SDimitry Andric __m128i __b) { 4329*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 4330*81ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 4331*81ad6265SDimitry Andric 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); 43320b57cec5SDimitry Andric } 43330b57cec5SDimitry Andric 43340b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 43350b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 43360b57cec5SDimitry Andric /// 43370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43380b57cec5SDimitry Andric /// 43390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 43400b57cec5SDimitry Andric /// instruction. 43410b57cec5SDimitry Andric /// 43420b57cec5SDimitry Andric /// \param __a 43430b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 43440b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 43450b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 43460b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 43470b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 43480b57cec5SDimitry Andric /// \param __b 43490b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 43500b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 43510b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 43520b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 43530b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 43540b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 4355*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, 4356*81ad6265SDimitry Andric __m128i __b) { 4357*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 4358*81ad6265SDimitry Andric 8 + 5, 6, 8 + 6, 7, 8 + 7); 43590b57cec5SDimitry Andric } 43600b57cec5SDimitry Andric 43610b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 43620b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 43630b57cec5SDimitry Andric /// 43640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43650b57cec5SDimitry Andric /// 43660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 43670b57cec5SDimitry Andric /// instruction. 43680b57cec5SDimitry Andric /// 43690b57cec5SDimitry Andric /// \param __a 43700b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 43710b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 43720b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 43730b57cec5SDimitry Andric /// \param __b 43740b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 43750b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 43760b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 43770b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 4378*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, 4379*81ad6265SDimitry Andric __m128i __b) { 4380*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 4381*81ad6265SDimitry Andric 4 + 3); 43820b57cec5SDimitry Andric } 43830b57cec5SDimitry Andric 43840b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 43850b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 43860b57cec5SDimitry Andric /// 43870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43880b57cec5SDimitry Andric /// 43890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 43900b57cec5SDimitry Andric /// instruction. 43910b57cec5SDimitry Andric /// 43920b57cec5SDimitry Andric /// \param __a 43930b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 43940b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 43950b57cec5SDimitry Andric /// \param __b 43960b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 43970b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 43980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 4399*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, 4400*81ad6265SDimitry Andric __m128i __b) { 44010b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); 44020b57cec5SDimitry Andric } 44030b57cec5SDimitry Andric 44040b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 44050b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 44060b57cec5SDimitry Andric /// 44070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44080b57cec5SDimitry Andric /// 44090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 44100b57cec5SDimitry Andric /// instruction. 44110b57cec5SDimitry Andric /// 44120b57cec5SDimitry Andric /// \param __a 44130b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 44140b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 44150b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 44160b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 44170b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 44180b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 44190b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 44200b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 44210b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 44220b57cec5SDimitry Andric /// \param __b 44230b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 44240b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 44250b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 44260b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 44270b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 44280b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 44290b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 44300b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 44310b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 44320b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 4433*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, 4434*81ad6265SDimitry Andric __m128i __b) { 4435*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 4436*81ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 4437*81ad6265SDimitry Andric 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); 44380b57cec5SDimitry Andric } 44390b57cec5SDimitry Andric 44400b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 44410b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 44420b57cec5SDimitry Andric /// [8 x i16]. 44430b57cec5SDimitry Andric /// 44440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44450b57cec5SDimitry Andric /// 44460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 44470b57cec5SDimitry Andric /// instruction. 44480b57cec5SDimitry Andric /// 44490b57cec5SDimitry Andric /// \param __a 44500b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44510b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 44520b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 44530b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 44540b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 44550b57cec5SDimitry Andric /// \param __b 44560b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44570b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 44580b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 44590b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 44600b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 44610b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 4462*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, 4463*81ad6265SDimitry Andric __m128i __b) { 4464*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 4465*81ad6265SDimitry Andric 8 + 1, 2, 8 + 2, 3, 8 + 3); 44660b57cec5SDimitry Andric } 44670b57cec5SDimitry Andric 44680b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 44690b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 44700b57cec5SDimitry Andric /// 44710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44720b57cec5SDimitry Andric /// 44730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 44740b57cec5SDimitry Andric /// instruction. 44750b57cec5SDimitry Andric /// 44760b57cec5SDimitry Andric /// \param __a 44770b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44780b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 44790b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 44800b57cec5SDimitry Andric /// \param __b 44810b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44820b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 44830b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 44840b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 4485*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, 4486*81ad6265SDimitry Andric __m128i __b) { 4487*81ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 4488*81ad6265SDimitry Andric 4 + 1); 44890b57cec5SDimitry Andric } 44900b57cec5SDimitry Andric 44910b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 44920b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 44930b57cec5SDimitry Andric /// 44940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44950b57cec5SDimitry Andric /// 44960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 44970b57cec5SDimitry Andric /// instruction. 44980b57cec5SDimitry Andric /// 44990b57cec5SDimitry Andric /// \param __a 45000b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45010b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 45020b57cec5SDimitry Andric /// \param __b 45030b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45040b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 45050b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 4506*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, 4507*81ad6265SDimitry Andric __m128i __b) { 45080b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); 45090b57cec5SDimitry Andric } 45100b57cec5SDimitry Andric 45110b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 45120b57cec5SDimitry Andric /// integer. 45130b57cec5SDimitry Andric /// 45140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45150b57cec5SDimitry Andric /// 45160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 45170b57cec5SDimitry Andric /// 45180b57cec5SDimitry Andric /// \param __a 45190b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 45200b57cec5SDimitry Andric /// destination. 45210b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 4522*81ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { 45230b57cec5SDimitry Andric return (__m64)__a[0]; 45240b57cec5SDimitry Andric } 45250b57cec5SDimitry Andric 45260b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 45270b57cec5SDimitry Andric /// upper bits. 45280b57cec5SDimitry Andric /// 45290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45300b57cec5SDimitry Andric /// 45310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 45320b57cec5SDimitry Andric /// 45330b57cec5SDimitry Andric /// \param __a 45340b57cec5SDimitry Andric /// A 64-bit value. 45350b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 45360b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 4537*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { 45380b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){(long long)__a, 0}; 45390b57cec5SDimitry Andric } 45400b57cec5SDimitry Andric 45410b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 45420b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 45430b57cec5SDimitry Andric /// 45440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45450b57cec5SDimitry Andric /// 45460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 45470b57cec5SDimitry Andric /// 45480b57cec5SDimitry Andric /// \param __a 45490b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 45500b57cec5SDimitry Andric /// destination. 45510b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 45520b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 4553*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { 45540b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 45550b57cec5SDimitry Andric } 45560b57cec5SDimitry Andric 45570b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 45580b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 45590b57cec5SDimitry Andric /// double]. 45600b57cec5SDimitry Andric /// 45610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45620b57cec5SDimitry Andric /// 45630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 45640b57cec5SDimitry Andric /// 45650b57cec5SDimitry Andric /// \param __a 45660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45670b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 45680b57cec5SDimitry Andric /// \param __b 45690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45700b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 45710b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 4572*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, 4573*81ad6265SDimitry Andric __m128d __b) { 45740b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); 45750b57cec5SDimitry Andric } 45760b57cec5SDimitry Andric 45770b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 45780b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 45790b57cec5SDimitry Andric /// double]. 45800b57cec5SDimitry Andric /// 45810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45820b57cec5SDimitry Andric /// 45830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 45840b57cec5SDimitry Andric /// 45850b57cec5SDimitry Andric /// \param __a 45860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45870b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 45880b57cec5SDimitry Andric /// \param __b 45890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 45900b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 45910b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 4592*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, 4593*81ad6265SDimitry Andric __m128d __b) { 45940b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); 45950b57cec5SDimitry Andric } 45960b57cec5SDimitry Andric 45970b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 45980b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 45990b57cec5SDimitry Andric /// low-order bits of the destination. 46000b57cec5SDimitry Andric /// 46010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46020b57cec5SDimitry Andric /// 46030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 46040b57cec5SDimitry Andric /// 46050b57cec5SDimitry Andric /// \param __a 46060b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 46070b57cec5SDimitry Andric /// be extracted. 46080b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 46090b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 4610*81ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { 46110b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 46120b57cec5SDimitry Andric } 46130b57cec5SDimitry Andric 46140b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 46150b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 46160b57cec5SDimitry Andric /// parameter as a specifier. 46170b57cec5SDimitry Andric /// 46180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46190b57cec5SDimitry Andric /// 46200b57cec5SDimitry Andric /// \code 46210b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 46220b57cec5SDimitry Andric /// \endcode 46230b57cec5SDimitry Andric /// 46240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 46250b57cec5SDimitry Andric /// 46260b57cec5SDimitry Andric /// \param a 46270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46280b57cec5SDimitry Andric /// \param b 46290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46300b57cec5SDimitry Andric /// \param i 46310b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 46320b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 46330b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 46340b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 46350b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 46360b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 4637*81ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. 4638*81ad6265SDimitry Andric /// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form 4639*81ad6265SDimitry Andric /// <c>[b1, b0]</c>. 46400b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 46410b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 4642349cc55cSDimitry Andric ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 4643349cc55cSDimitry Andric (int)(i))) 46440b57cec5SDimitry Andric 46450b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 46460b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 46470b57cec5SDimitry Andric /// 46480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46490b57cec5SDimitry Andric /// 46500b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46510b57cec5SDimitry Andric /// 46520b57cec5SDimitry Andric /// \param __a 46530b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 46540b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 46550b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4656*81ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { 46570b57cec5SDimitry Andric return (__m128)__a; 46580b57cec5SDimitry Andric } 46590b57cec5SDimitry Andric 46600b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 46610b57cec5SDimitry Andric /// integer vector. 46620b57cec5SDimitry Andric /// 46630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46640b57cec5SDimitry Andric /// 46650b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46660b57cec5SDimitry Andric /// 46670b57cec5SDimitry Andric /// \param __a 46680b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 46690b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 46700b57cec5SDimitry Andric /// parameter. 4671*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { 46720b57cec5SDimitry Andric return (__m128i)__a; 46730b57cec5SDimitry Andric } 46740b57cec5SDimitry Andric 46750b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 46760b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 46770b57cec5SDimitry Andric /// 46780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46790b57cec5SDimitry Andric /// 46800b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46810b57cec5SDimitry Andric /// 46820b57cec5SDimitry Andric /// \param __a 46830b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 46840b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 46850b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4686*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { 46870b57cec5SDimitry Andric return (__m128d)__a; 46880b57cec5SDimitry Andric } 46890b57cec5SDimitry Andric 46900b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 46910b57cec5SDimitry Andric /// integer vector. 46920b57cec5SDimitry Andric /// 46930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46940b57cec5SDimitry Andric /// 46950b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 46960b57cec5SDimitry Andric /// 46970b57cec5SDimitry Andric /// \param __a 46980b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 46990b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 47000b57cec5SDimitry Andric /// parameter. 4701*81ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { 47020b57cec5SDimitry Andric return (__m128i)__a; 47030b57cec5SDimitry Andric } 47040b57cec5SDimitry Andric 47050b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47060b57cec5SDimitry Andric /// of [4 x float]. 47070b57cec5SDimitry Andric /// 47080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47090b57cec5SDimitry Andric /// 47100b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47110b57cec5SDimitry Andric /// 47120b57cec5SDimitry Andric /// \param __a 47130b57cec5SDimitry Andric /// A 128-bit integer vector. 47140b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 47150b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4716*81ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { 47170b57cec5SDimitry Andric return (__m128)__a; 47180b57cec5SDimitry Andric } 47190b57cec5SDimitry Andric 47200b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47210b57cec5SDimitry Andric /// of [2 x double]. 47220b57cec5SDimitry Andric /// 47230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47240b57cec5SDimitry Andric /// 47250b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47260b57cec5SDimitry Andric /// 47270b57cec5SDimitry Andric /// \param __a 47280b57cec5SDimitry Andric /// A 128-bit integer vector. 47290b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 47300b57cec5SDimitry Andric /// bitwise pattern as the parameter. 4731*81ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { 47320b57cec5SDimitry Andric return (__m128d)__a; 47330b57cec5SDimitry Andric } 47340b57cec5SDimitry Andric 47350b57cec5SDimitry Andric #if defined(__cplusplus) 47360b57cec5SDimitry Andric extern "C" { 47370b57cec5SDimitry Andric #endif 47380b57cec5SDimitry Andric 47390b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 47400b57cec5SDimitry Andric /// optimizing power consumption during the loop. 47410b57cec5SDimitry Andric /// 47420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47430b57cec5SDimitry Andric /// 47440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 47450b57cec5SDimitry Andric /// 47460b57cec5SDimitry Andric void _mm_pause(void); 47470b57cec5SDimitry Andric 47480b57cec5SDimitry Andric #if defined(__cplusplus) 47490b57cec5SDimitry Andric } // extern "C" 47500b57cec5SDimitry Andric #endif 47510b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 47520b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 47530b57cec5SDimitry Andric 47540b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 47550b57cec5SDimitry Andric 47565ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040U) 47575ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000U) 47580b57cec5SDimitry Andric 47595ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040U) 47600b57cec5SDimitry Andric 47610b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 4762*81ad6265SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) \ 4763*81ad6265SDimitry Andric (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 47640b57cec5SDimitry Andric 47650b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4766